Introduction

This Mark Down file contains code and results evaluating the effect of wildfire smoke on cardiopulmonary morbidity and mortality along the Colorado front range communities from 2010 to 2015. The hypothesis for this study is that we will see an increase in the risk of cardiopulmonary morbidity and mortality associated with an increase in PM2.5 due to wildfire smoke.

Study Area Map

Study bounding box.

# clip by bbox function ------
bbox_clip <- function(sf, bbox) {
  # find the CRS of the sf object
  crs <- sf::st_crs(sf)$proj4string
  # create matrix
  x <- c(bbox[1], bbox[1], bbox[3], bbox[3], bbox[1])
  y <- c(bbox[2], bbox[4], bbox[4], bbox[2], bbox[2])
  coords <- matrix(cbind(x, y), ncol=2)
  # create polygon and assign same coord crs as sf object
  coords_poly <- sp::Polygon(coords)
  bbox_poly <- sp:: SpatialPolygons(list(sp::Polygons(list(coords_poly),
    ID = "bbox")), proj4string = sp::CRS(crs))
  # convert to sf feature
  bbox_sf <- st_as_sf(bbox_poly)
  # clip sf object
  clipped_sf <- sf[bbox_sf,]
  return(clipped_sf)
}

# clipping bounding box
study_bbox <- st_bbox(c(xmin=-105.3, xmax=-104.5, ymax=41, ymin = 38))

Reading in county shapefiles.

# county subset
county_sub_name <- c("Larimer", "Weld", "Boulder", "Broomfield", "Adams", 
                "Denver", "Jefferson", "Arapahoe", "Douglas", "El Paso",
                "Pueblo")

# read in county shapefile and subset to only colorado fips
co_county_sf <- st_read("../../data/shapefiles/us_county", 
                        layer = "us_county") %>%
  # limit to colorado
  filter(STATEFP == "08") %>% 
  # create fips variable
  mutate(fips = paste0(STATEFP, COUNTYFP))
## Reading layer `us_county' from data source `/Users/ganr1/Documents/public_repo/colorado_wildfire/data/shapefiles/us_county' using driver `ESRI Shapefile'
## Simple feature collection with 3108 features and 9 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -124.7631 ymin: 24.5231 xmax: -66.9499 ymax: 49.38436
## epsg (SRID):    4326
## proj4string:    +proj=longlat +datum=WGS84 +no_defs
# save wgs84 crs
wgs <- st_crs(co_county_sf)

# county clip
county_clip <- bbox_clip(co_county_sf, study_bbox)

# extract county names
county_text <- county_clip %>% 
  st_transform(wgs) %>% 
  st_centroid() %>% 
  st_coordinates() %>% 
  cbind(., as.character(county_clip$NAME)) %>%
  as_data_frame() %>% 
  rename(lon = X, lat = Y, county = V3) %>% 
  mutate(lon = as.numeric(lon), lat = as.numeric(lat))

Reading in Front Range Grid points that will define who is included in the study area.

# read in front range grid csv
frontrange_grid <- read_csv('../../data/smoke/front_range_grid.csv') 

# read in grid simple features and limit to the front range grid
study_grid <- read_sf('../../data/shapefiles/co_krig_grid/', crs = wgs) %>% 
  filter(GRID_ID %in% frontrange_grid$GRID_ID)

nrow(frontrange_grid)
## [1] 75
# print out bounding box for front range grid
ggplot(frontrange_grid, 
       aes(x=lon, y=lat, label=paste(round(lon,2), round(lat,2), sep=', '))) +
  geom_text(size = 4) +
  xlim(-105.5,-104.4)

Note 2020-04-18:

Ideally I’d like to give the exact coordinates for the bounding box for Kate, but I don’t remember how to exacly subset. I’ve printed out the Lon/Lat coords of the grid cells I used in air pollution measures for the front range. Keep in mind I projected it as a WGS 84 to align with other shapefiles. But it looks like the bounding box cells would be Top left: -105.39, 40.63 Top right: -104.62, 40.67 Bottom left: -105.24, 38.59 Bottom right: -104.49, 38.62

Interstate shapefile limited to I-25 and I-70.

# read in colorado roads shapefile
interstate <- st_read(paste0("../../data/shapefiles/tl_2015_08_prisecroads"), 
                      layer = "tl_2015_08_prisecroads") %>% 
  # filter to I25 lines
  filter(RTTYP == "I") %>% 
  st_transform(crs = wgs)
## Reading layer `tl_2015_08_prisecroads' from data source `/Users/ganr1/Documents/public_repo/colorado_wildfire/data/shapefiles/tl_2015_08_prisecroads' using driver `ESRI Shapefile'
## Simple feature collection with 3230 features and 4 fields
## geometry type:  LINESTRING
## dimension:      XY
## bbox:           xmin: -109.0602 ymin: 36.99251 xmax: -102.0417 ymax: 41.00307
## epsg (SRID):    4269
## proj4string:    +proj=longlat +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +no_defs
# interstate clip
i_clip <- bbox_clip(interstate, study_bbox)

Reading in city boundary simple features.

# read in city polygons 2010
city <- st_read("../../data/shapefiles/Colorado_City_Point_Locations/", 
                layer = "Colorado_City_Point_Locations") %>% 
  st_transform(crs = wgs)
## Reading layer `Colorado_City_Point_Locations' from data source `/Users/ganr1/Documents/public_repo/colorado_wildfire/data/shapefiles/Colorado_City_Point_Locations' using driver `ESRI Shapefile'
## Simple feature collection with 587 features and 8 fields
## geometry type:  POINT
## dimension:      XY
## bbox:           xmin: -109.0146 ymin: 37.00304 xmax: -102.0804 ymax: 40.98833
## epsg (SRID):    4269
## proj4string:    +proj=longlat +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +no_defs
# limit cities 
city_points <- city %>% 
  filter(NAME %in% c("FORT COLLINS", "PUEBLO", "GREELEY", "BOULDER", "DENVER",
                     "COLORADO SPRINGS")) %>% 
  mutate(city = stringr::str_to_title(NAME))

Reading 2015 population estimate geotiff for Colorado. I made this file for the ALA project from the SEDAC 2015 global estimate.

# read colorado 2015 population raster
co_pop_2015 <- raster::raster("../../data/shapefiles/2015-ColoradoPopDensity.tif")
# extract bbox of clipped county subset
fr_extent <- raster::extent(st_bbox(county_clip)[c(1,3,2,4)])
# raster
fr_pop_2015 <- raster::crop(co_pop_2015, fr_extent)

# i'm going to create a shapefile/simple features; easier to plot
front_range_pop_sf <- st_as_sf(raster::rasterToPolygons(fr_pop_2015)) %>% 
  rename(popden = X2015.ColoradoPopDensity) %>% 
  # filtering to cells > 100 
  filter(popden > 100)

# cut once more to county shapefile so that populations outside the counties are not present
pop_clip <- front_range_pop_sf[county_clip,]

Map of Study Area

Study map that will contain an inset map to highlight the area of interest.

Inset map.

# create county inset map
inset_map <- ggplot(co_county_sf) +
  geom_sf(fill = "transparent", color = "black", size = 0.1) +
  geom_sf(data = study_grid, 
          fill = 'red', 
          color = 'transparent', 
          alpha = 0.5) +
  ggtitle("Colorado: Study Area") +
  theme(plot.title = element_text(size = 12),
        panel.background = element_blank(),
        panel.grid.major = element_line(colour = 'transparent'),
        panel.grid.minor = element_blank(),
        axis.text = element_blank(),
        axis.ticks = element_blank(),
        axis.title = element_blank(),
        plot.background = element_rect(fill = "transparent", 
                                       color = "transparent"))
inset_map

Study map.

# plot map
study_map <-ggplot(pop_clip) +
  # start with plot of simple features of populations
  geom_sf(aes(fill=popden), color = NA) +
  # define aesthetics of poipulation
    scale_fill_gradient(name = expression("Population per km"^2), 
      low = "#26d0ce", high = "#1a2980") +
  # plot lines of counties
  geom_sf(data = county_clip, color = "black", 
          fill = "transparent", size = 0.5) +
  # plot i-25 and i-70
  geom_sf(data = i_clip, aes(color = "Interstate"), show.legend = "line") +
  # plot study grid
  geom_sf(data = study_grid, aes(color = "Study Grid"), fill = "transparent", 
          size = 0.1, show.legend = "line") +
  # custom colors for interstate and study grid
  scale_color_manual(values = c("Interstate" = "#0f9b0f", 
                                "Study Grid" = "red"), 
    labels = c("Interstate", "Study Grid"),
    name = "Boundary") + 
  # major city points and names
  geom_point(data = city_points, aes(x = LONG, y = LAT), color = "#3f2b96") +
  geom_text(data = city_points, aes(x = LONG, y = LAT, label = city), 
             color = "#3f2b96", size = 4, hjust = 1, vjust = -0.6) +
  # theme
  theme(panel.background = element_blank(),
        panel.grid.major = element_line(colour = 'transparent'),
        panel.grid.minor = element_blank(),
        axis.text = element_blank(),
        axis.ticks = element_blank(),
        axis.title = element_blank(),
        legend.key = element_rect(fill = NA, colour = NA, size = 0.25))

study_map

Final map with inset. Using the grid package.I may add the AQS monitors to this map.

# save map
#tiff(filename = "../analysis/figures/study_map.tiff", w=850, h=550)
grid::grid.newpage()
vpmain <- grid::viewport(width = 1, height = 1, x = 0.5, y = 0.5)
vpinset <- grid::viewport(width = 0.25, height = 0.25, x = 0.8, y = 0.85)
print(study_map, vp = vpmain)
print(inset_map, vp = vpinset)

Smoke PM2.5

Kate has estimated PM2.5 at 15x15 km grid level using kriged surface site monitors for the entire US from 2010 to 2015. I have limited the gridded PM2.5 to Colorado Front Range study grid. I have also estimated population-weighted PM2.5 for all Colorado counties (although I don’t think I’ll use it for this project). There were concerns about the accuracy of the krig PM in areas outside of the front range communities since most PM stations are on the Front Range.

I am also going to set up this lag function here to create lagged days that will be used in the analysis.

# defining a lag function that I will use later in the distributed lag model
funlag <- function(var, n=5){
  var <- enquo(var)
  indices <- seq_len(n)
  map( indices, ~quo(lag(!!var, !!.x)) ) %>% 
    set_names(sprintf("%s_lag%d", rlang::quo_text(var), indices))
}

I’d like to find the most likely grid for each county and plot each grid for each county instead of the population-weighted county estimates. I’m going to find the county name for which has the largest proportion of intersection of the grid.

# identifying the county that most of the grid lies in
grid_county_id <- st_intersection(study_grid, county_clip) %>% 
  dplyr::select(GRID_ID, NAME, fips) 

# proportion each grid in county
grid_prop <- as.numeric(st_area(grid_county_id)/st_area(study_grid))

# find the county id that contains the highest proportion for each grid
grid_county_final <- grid_county_id %>% 
  mutate(prop_int = grid_prop,
         GRID_ID = as.character(GRID_ID)) %>% 
  group_by(GRID_ID) %>% 
  filter(prop_int == max(prop_int))

# remove geometry
st_geometry(grid_county_final) <- NULL

Consider removing this following section.

# front range fips
fr_fips <- c("08001", "08005", "08013", "08031", "08035", "08041",
             "08059", "08069", "08123")

# read pm data 
co_pm <- read_csv("../../data/smoke/1015-county_popwt_pm.csv") %>% 
  # limiting to colorado counties only by reading 1st 2 digs of 5-dig fips code
  filter(fips %in% fr_fips) %>% 
  # renaming variable pm_smk to pm_diff; more accurate description of var
  rename(pm_diff = pm_smk) %>% 
  # us
  mutate(county_name = case_when(fips == '08001' ~ 'Adams',
                                 fips == '08005' ~ 'Arapahoe',
                                 fips == '08013' ~ 'Boulder',
                                 fips == '08031' ~ 'Denver',
                                 fips == '08035' ~ 'Douglas',
                                 fips == '08041' ~ 'El Paso',
                                 fips == '08059' ~ 'Jefferson',
                                 fips == '08069' ~ 'Larimer',
                                 fips == '08123' ~ 'Weld'),
         day = as.factor(weekdays(date)), # create day of week based on var
         weekend = ifelse(day %in% c("Saturday", "Sunday"), 1, 0),
         month = as.factor(lubridate::month(date)), # extract month as factor
         year = as.factor(lubridate::year(date)), # extract year as factor
         season = as.factor(case_when(month %in% c(12, 1, 2) ~ "winter",
                                      month %in% c(3:5) ~ "spring",
                                      month %in% c(6:8) ~ "summer",
                                      month %in% c(9:11)~ "fall")),
         # creating binary smoke to require 50% of county with smoke overhead
         # and difference between estimate and seasonal background to be >0
         # and to be within the month of April to Octoboer
         smoke0_hms = ifelse(pm_diff > 0 & month %in% c(5:10) & hms > 0.5,1,0),
         smoke5_hms = ifelse(pm_diff > 5 & month %in% c(5:10) & hms > 0.1,1,0),
         smoke10_hms = ifelse(pm_diff > 10 & month %in% c(5:10) & hms > 0.1,1,0),
         smoke15_hms = ifelse(pm_diff > 15 & month %in% c(5:10) & hms > 0.1,1,0),
         # transforming pm kriged estimates to a 10 unit increase in pm
         pm = pm_krig/10, 
         # continuous pm smoke accounting for hms
         cpm_smk = ifelse(hms > 0.1 & month %in% c(5:10), pm_diff, 0),
         cpm_smk = ifelse(cpm_smk < 0, 0, cpm_smk),
         aqi_warning = ifelse(aqi_cat %in% 
            c('Unhealthy', 'Unhealthy for Sensitive Groups'), 1, 0)) %>%  
  # sorting by fips and date to estimate lag for each county by date
  arrange(fips, date) %>% 
  # group by fips 
  group_by(fips) %>%
  # apply funlag to create lagged estimates
  mutate(., !!!funlag(pm,5), !!!funlag(smoke0_hms,5), !!!funlag(smoke5_hms,5),
         !!!funlag(smoke10_hms,5), !!!funlag(smoke15_hms,5), 
         !!!funlag(temp_f, 5)) %>% 
  dplyr::select(-state, -month) # removing state and month to bind with casecross

Front Range Grid Estimates of Smoke PM2.5 2010 to 2015

To try and estimate PM2.5 from smoke vs. other sources, I’ve taken the estimated kriged value of PM2.5 and subtracted off the seasonal background of PM2.5 and only consider this difference to be from smoke if there is smoke in the atmospheric column using the HMS grids. For county population-weighted values, I use a rather loose definition of at least 10% of the county with smoke overhead. The grid-level estimate of smoke is probably more reliable. I’ll show some quick diagnostic plots below.

Here is a plot of the population-weighted smoke PM2.5 for each Front Range county.

pm_plot <- ggplot(data=co_pm, aes(x=date, y=cpm_smk)) +
  geom_point(size = 0.5, color = "#6441a5") +
  facet_wrap(~county_name) +
  xlab('Date') +
  ylab('County Smoke PM2.5 ug/m^3') +
  theme_minimal()
# plot
print(pm_plot)

This definition of smoke seems reasonable. I do have some concerns with the Kriged model in that there isn’t enough stations to capture spatial variations. For example, the 2012 Waldo Canyon Fire likely affected south Front Range counties like El Paso a little after the High Park Fire likely affected northern front range counties like Larmier. Does the county population-weighted time series reflect this? All the smoke peaks across look uniform across Front Range counties. This could be explained by the kriging process if certain sites are really driving the smoothed surfaces.

Maps of the study area and specific fires may help understand the exposure series. I will work on these after some input.

# code chunk that reads in grid pm2.5 and prepares some lagged variables
# for joining with health outcomes data.

# read new grid/wrfgrid key
grid_key <- read_csv('../../data/shapefiles/wrfgrid_key.csv', 
                     col_types = list(GRID_ID = col_character(),
                                      WRFGRID_ID = col_character())) %>% 
  dplyr::select(-COLX, -ROWY)

# read grid pm
grid_pm <- read_csv('../../data/smoke/1015-grid_pm.csv', 
                    col_types = list(GRID_ID = col_character())) %>% 
  # create some variables         
  mutate(pm_diff_g = pm25_grid - sbg_pm_grid,
         month = as.factor(lubridate::month(date)), # extract month as factor
         gsmk5_hms = ifelse(pm_diff_g > 5 & month %in% c(4:10) & hms_grid == 1,
                            1,0),
         gsmk10_hms = ifelse(pm_diff_g > 10 & month %in% c(4:10) & hms_grid == 1,
                             1,0),
         gsmk15_hms = ifelse(pm_diff_g > 15 & month %in% c(4:10) & hms_grid == 1,
                             1,0),
         # continuous pm smoke accounting for hms
         gpm_smk = ifelse(hms_grid == 1 & month %in% c(5:10), pm_diff_g, 0),
         gpm_smk = ifelse(gpm_smk < 0, 0, gpm_smk),
         # transforming pm smke estimates to a 10 unit increase in pm
         gpm_smk10unit = gpm_smk/10) %>% 
         # sorting by GRID_ID and date to estimate lag for each county by date
         arrange(GRID_ID, date) %>% 
         # group by GRID_ID 
         group_by(GRID_ID) %>%
         # apply funlag to create lagged estimates
         mutate(., !!!funlag(gpm_smk10unit,5), !!!funlag(gsmk5_hms,5),
               !!!funlag(gsmk10_hms,5), !!!funlag(gsmk15_hms,5),
               !!!funlag(temp_f_grid, 5)) %>% 
         dplyr::select(-month) %>% 
         # filter to grids in study area
         filter(GRID_ID %in% frontrange_grid$GRID_ID) %>% 
         # join county id
         left_join(dplyr::select(grid_county_final, GRID_ID, NAME, fips), 
                   by = 'GRID_ID')

Grid smoke values in each front range county.

# front range fips
fr_fips <- c("08001", "08005", "08013", "08031", "08035", "08041",
             "08059", "08069", "08123")

pm_plot <- ggplot(data=filter(grid_pm, fips %in% fr_fips), 
                  aes(x=date, y=gpm_smk, group = GRID_ID)) +
  geom_point(size = 0.5, color = "#6441a5") +
  facet_wrap(~NAME) +
  xlab('Date') +
  ylab(expression(paste("Smoke PM"[2.5]," in ", mu,"g/m"^3))) +
  theme_minimal()
# plot
print(pm_plot)

Summary statistics of PM2.5 in grid.

grid_pm %>%
  ungroup() %>% 
  summarize(mean_pm25 = mean(pm25_grid), max_pm25 = max(pm25_grid),
            min_pm25 = 0)

Wildfire season summary stats.

grid_pm %>%
  mutate(month = lubridate::month(date)) %>% 
  filter(month %in% 5:10) %>% 
  ungroup() %>% 
  summarize(mean_pm25 = mean(pm25_grid), max_pm25 = max(pm25_grid),
            min_pm25 = 0, mean_smk = mean(gpm_smk), max_smk = max(gpm_smk))

Reading ozone estimates.

ozone <- read_csv('../../data/smoke/1015-frontrange_kriged_o3.csv') %>% 
  mutate(GRID_ID = as.character(GRID_ID)) %>% 
  # sorting by GRID_ID and date to estimate lag for each county by date
  arrange(GRID_ID, date) %>% 
  # group by GRID_ID 
  group_by(GRID_ID) %>%
  # apply funlag to create lagged estimates
  mutate(., !!!funlag(o3_8hr_max_ppb,5))

Hospitalizations and Mortality

I’ve created time-stratified case-crossover data frames for both mortality and hospitalization data provided by CDPHE with reference periods within a month of the observation. The justification for a ‘month’ period is appropriate for mortality I think, as I think it’s reasonable counter factual window in when a person could have died. A longer period like we’ve previously used like the entire wildfire season may not be as appropriate for mortality for a person who is already at risk of death.

Hospitalizations

# load casecross list
load("../../data/health/1015-co_morbidity_casecross_list.RData")
# load icd9
load("../../data/health/icd9_outcome_vectors.RData")

Cleaning hospitalization data and joining with both grid-level and count-level PM2.5 data. I consider the following inpatient hospitalization events with admitting source via the emergency room or urgent care (as a way to get at acute events): all respiratory, asthma, chronic obstructive pulmonary disease (COPD), acute bronchitis, pneumonia, all cardiovascular events, arrhythmia, cerebrovascular, heart failure, ischemic heart disease, and myocardial infraction (which is a subcategory of IHD).

Some notes for the Colorado hospitalization data is that unlike Washington and Oregon, I have no way of identifying multiple events per person so I am not able to limit to first event and I am assuming each event is independent of others. Also, unlike Washington and Oregon, I’ve decided to go with monthly referent periods rather than the whole wildfire season. Possible suggestion would be to run sensitivity analyses to see if this has an impact.

# extract names
outcome <- c('All Respiratory', 'Asthma', 'COPD', 'Acute Bronchitis',
              'Pneumonia', 'All CVD', 'Arrhythmia', 'Cerebrovascular',
              'Heart Failure', 'Ischemic Heart Disease', 
              'Myocardial Infarction')

# reduce case-crossover list to only summer months and join GRID ID
co_hosp_list <- co_morbidity_cc_list %>% 
  # desired format to make sure it's right
  map(~ mutate(., outcome = as.numeric(as.character(outcome)),
               date = as.Date(as.character(date)),
               month = as.factor(lubridate::month(date))) %>% 
      # filter out 2016; I don't have pm data yet
      filter(date <= "2015-12-30") %>% 
      filter(month %in% 5:10) %>% 
      filter(fips %in% fr_fips) %>% 
      dplyr::select(-state, -month) %>% 
      # join with grid key
      left_join(grid_key, by = 'WRFGRID_ID') %>% 
      left_join(co_pm, by = c("fips", "date")) %>% 
      # left join grid pm
      left_join(grid_pm, by = c("GRID_ID", "date")) %>% 
      left_join(ozone, by = c("GRID_ID", "date")) %>% 
      # filter to front range grid ids
      filter(GRID_ID %in% frontrange_grid$GRID_ID)) %>% 
  # add outcome name to each dataframe
  map2(.x = ., .y = outcome, ~mutate(.x, out_name = .y))

Colorado Hospitalization Respiratory and Cardiovascular Time Series

Creating and plotting the time series of respiratory and cardiovascular events for the state of Colorado.

# create time series of hospitalizations
ts_hosp <- co_morbidity_cc_list[c(1,6)] %>% 
  # using map 2 to add outcome names to each dataframe
  map2(., names(co_morbidity_cc_list[c(1,6)]), function(df, name){
    counts <- df %>% 
      filter(outcome == 1) %>% 
      group_by(date) %>% 
      summarise(n = n()) %>% 
      mutate(outcome = name,
             date = as.Date(date))
    }) %>% 
  # bind dataframes
  map_dfr(.,rbind)
ts_hosp_plot <- ggplot(data=ts_hosp, aes(x=date, y=n)) +
  geom_point() +
  facet_wrap(~outcome) +
  theme_minimal()

ts_hosp_plot

The cardiovascular hospitalization is pretty uniform, with some noticeable dips at the end of the year of 2013 and when ICD9 switches to ICD10 at the end of the time series. I’d say this is probably a systemic thing that has to do with coding or how hospitals recorded events rather than an actual big drop in the rate of CVD hospitalizations. Although may Kirk or some others at CDPHE may know.

Counts of Hospitalization Events

Number of inpatient hospitalization events that occurred in Colorado Front Range Communities between 2010 and 2015 in the May to October Months. Also note that October of 2015 is when ICD9 was switched to ICD10, so I do not have that last month.

strata <- c('All', 'Sex', 'Age')
# estimate hospitalizations
hosp_counts <- co_hosp_list %>% 
  map_dfr(.,function(df){
    map_dfr(strata, function(x){
    if(x == 'All'){
      counts <- df %>% 
        filter(outcome == 1) %>% 
        group_by(out_name) %>% 
        summarise(n_events = n()) %>% 
        mutate(strata = x) %>% 
        dplyr::select(out_name, strata, n_events)
    } else if (x == 'Sex'){
      counts <- df %>% 
        filter(outcome == 1) %>% 
        group_by(out_name, sex) %>% 
        summarise(n_events = n()) %>% 
        rename(strata = sex) %>% 
        dplyr::select(out_name, strata, n_events)
    } else {
      counts <- df %>% 
        filter(outcome == 1) %>% 
        group_by(out_name, age_cat) %>% 
        summarise(n_events = n()) %>% 
        rename(strata = age_cat) %>% 
        dplyr::select(out_name, strata, n_events)
    }
    }) # end strata map
  }) %>% # end outcome list map
  spread(strata, n_events) %>% 
  dplyr::select(out_name, All, F, M, age_under_15, age_15_to_65, age_over_65) %>% 
  mutate_at(vars(F:age_over_65), funs(round((./All)*100,1)))

# print death counts
knitr::kable(hosp_counts, caption = 'Number of Inpatient Events')
Number of Inpatient Events
out_name All F M age_under_15 age_15_to_65 age_over_65
Acute Bronchitis 1884 41.9 58.1 65.3 20.1 14.5
All CVD 68356 47.8 52.2 0.3 40.2 59.5
All Respiratory 46585 50.8 49.2 7.9 44.9 47.2
Arrhythmia 9958 48.8 51.2 0.4 31.9 67.6
Asthma 6816 53.0 47.0 11.5 65.8 22.6
Cerebrovascular 10484 34.4 65.6 0.1 48.3 51.6
COPD 6656 53.6 46.4 0.2 37.3 62.5
Heart Failure 10090 33.7 66.3 0.1 48.2 51.7
Ischemic Heart Disease 10930 50.2 49.8 0.4 27.5 72.1
Myocardial Infarction 13660 53.6 46.4 0.3 33.7 65.9
Pneumonia 14694 51.2 48.8 5.4 40.7 53.9

Assessment of Assigned PM2.5

Quick assessment of how well my smoke PM2.5 definition at the grid level correlates at the county level for front range counties. I’ve used the cardiovascular hospitalizations (as they are most common) to plot the assigned county smoke PM2.5 vs grid smoke PM2.5 It looks like the values are similar in some respect, and sometimes not. I would assume the grid level is more accurate since my definition requires the grid be completely impacted by smoke based on the HMS plume.

ggplot(data = filter(co_hosp_list[[6]], !is.na(gpm_smk)), 
                     aes(x=cpm_smk, y =gpm_smk)) +
  geom_point() +
  ylab('Grid Smoke PM2.5') +
  xlab('County Smoke PM2.5') +
  ggtitle('Comparison of County vs Grid Smoke PM2.5') +
  theme_minimal()

Check how many events have a county PM assigned but not a grid PM assigned.

missing <- co_hosp_list[[6]] %>% 
  filter(outcome == 1) %>% 
  mutate(county_miss = ifelse(is.na(pm_krig), 1, 0),
         grid_miss = ifelse(is.na(pm25_grid), 1, 0)) %>% 
  group_by(county_miss, grid_miss) %>% 
  summarize(n = n()) %>% 
  mutate(prop = round(n/(69064+8716), 2))

knitr::kable(missing, caption = 'CVD Hospitalization Proportion Missing Grid PM2.5')
CVD Hospitalization Proportion Missing Grid PM2.5
county_miss grid_miss n prop
0 0 68356 0.88

Roughly 10% of subjects with a CVD hospitalization are missing grid PM2.5 which I’m guessing is because Kirk was not able to Geocode for what ever reason.

Same Day Association between Smoke PM2.5 and Inpatient Hospitalizations

First things first is to look at the same day association between an increase in smoke PM2.5 and the risk for an inpatient hospitalization.

# extract names
outcome <- c('All Respiratory', 'Asthma', 'COPD', 'Acute Bronchitis',
              'Pneumonia', 'All CVD', 'Arrhythmia', 'Cerebrovascular',
              'Heart Failure', 'Ischemic Heart Disease', 
              'Myocardial Infarction')
# outcome order
out_order <- c('All Respiratory', 'Asthma', 'COPD', 'Acute Bronchitis',
              'Pneumonia', 'All CVD', 'Arrhythmia', 'Cerebrovascular',
              'Heart Failure', 'Ischemic Heart Disease', 
              'Myocardial Infarction')

# same day results
same_day <- co_hosp_list %>% 
  map_dfr(., function(df){
    out_name <- unique(df$out_name)
    result <- broom::tidy(clogit(outcome ~ gpm_smk10unit + o3_8hr_max_ppb +
                                 temp_f_grid + strata(id),
                                 data = df)) %>% 
      filter(term == 'gpm_smk10unit') %>% 
      dplyr::select(term, estimate, conf.low, conf.high) %>% 
      mutate_at(vars(estimate:conf.high), exp)
  }) %>%
  bind_cols(data.frame(outcome), .) %>% 
  mutate(outcome = forcats::fct_relevel(outcome, out_order),
         group = as.factor(if_else(outcome %in% out_order[1:5],
         "Respiratory", "Cardiovascular")))

Same-day plot.

# plot
plot <- ggplot(data=same_day, aes(x=outcome, y = estimate, colour = group)) +
  geom_point() +
  geom_errorbar(aes(ymin=conf.low, ymax=conf.high), width = 0.3) +
  scale_color_manual("Cardiopulmonary", values = c("#ff00cc", "darkblue")) +
  geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
  ylab(expression(paste("Odds Ratio: Smoke PM"[2.5], " > 10 ", mu, "g/m"^3))) +
  xlab("Outcome") +
  theme_bw() +
  theme(panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype = "dotted"),
        panel.grid.minor = element_blank(),
        axis.text.x = element_text(angle = 45, hjust=0.95, vjust = 0.75))

print(plot)

There is no association for most outcomes. Notable observation would be the inverse association between increasing smoke and a lower likelihood of a respiratory hospitalization.

Here is a table for the point estimated and 95%CIs from the plot above. The gpm_smk10unit stands for grid PM2.5 smoke estimate, 10 unit increase.

knitr::kable(dplyr::select(same_day, outcome, estimate:conf.high) %>%
    mutate_at(vars(estimate:conf.high), ~round(.x, 3)), 
  caption = 'Same Day Association with Smoke and Cardiopulmonary Hospitalizations')
Same Day Association with Smoke and Cardiopulmonary Hospitalizations
outcome estimate conf.low conf.high
All Respiratory 0.950 0.901 1.003
Asthma 0.989 0.854 1.145
COPD 0.912 0.791 1.050
Acute Bronchitis 1.141 0.872 1.495
Pneumonia 1.028 0.935 1.130
All CVD 0.988 0.948 1.029
Arrhythmia 1.036 0.935 1.149
Cerebrovascular 1.048 0.949 1.158
Heart Failure 1.055 0.954 1.167
Ischemic Heart Disease 0.972 0.877 1.079
Myocardial Infarction 0.980 0.895 1.074

Cumulative effect for a 10 ug/m^3 of smoke exposure over 5 days

distribute_that_lag <- function(lag_mod, strata, exp_b) {
  # output pm basis estimates
  parms <- broom::tidy(lag_mod) %>% 
    filter(stringr::str_detect(term, strata)) %>% 
    dplyr::select(estimate) %>% 
    as_vector()
  # output estimate names for cov matrix
  names <- stringr::str_subset(names(lag_mod$coefficients), strata)
  
  # estimate associations
  est <- exp_b %*% parms
  # estimate standard error for each interval
  # time variable
  time <- ((rep(1:length(est))-1))
  # covariance matrix for knots 
  cov_mat <- as.matrix(vcov(lag_mod))[names, names]
  # estimate variance of spline
  var <- exp_b %*% cov_mat %*% t(exp_b)
  # estimate lag ----
  # estimate standard error for each lag day for smoke
  l_se <- sqrt(diag(var))
  # calculate lower and upper bound for smoke
  l_est_l95 <- est + (l_se*qnorm(1-0.975))
  l_est_u95 <- est + (l_se*qnorm(0.975))
  l_type <- "lag"
  # lag dataframe
  l_df <- data.frame(strata, l_type, time, 
                     exp(est), exp(l_est_l95), exp(l_est_u95), 
                     row.names = NULL) 
  # assign column names
  colnames(l_df) <- c("strata", "type", "time", 
                      "odds_ratio", "lower_95", "upper_95")
  # cumulative estimates
  c_est <- sapply(seq_along(est), function(x){
    sum(est[1:x])
  })
  # stderr cumulative effect smk
  c_se <- sapply(seq_along(c_est), function(y){
    sqrt(sum(var[1:y,1:y]))
  })
  # estimate 95% CI
  c_l95 <- c_est+(c_se*qnorm(1-0.975))
  c_u95 <- c_est+(c_se*qnorm(0.975))
  # type
  c_type <- "cumulative"
  # return dataframe
  c_df <- data.frame(strata, c_type, time, exp(c_est), 
                     exp(c_l95), exp(c_u95), row.names = NULL) 
  # assign column names
  colnames(c_df) <- c("strata", "type", "time", 
                      "odds_ratio", "lower_95", "upper_95")
  # bind lagged and cumulative 
  lag_est <- rbind(l_df, c_df) %>% 
    mutate(strata = as.character(strata),
           type = as.character(type))
  # return lagged estimate
  return(lag_est)
} # end lag estimate function

Contstrained distributed lag results.

# estimating lagged effects
out_rep <- rep(outcome, each = 12)

dl_results  <- lapply(co_hosp_list, function(x){
  # output dataframe from list
  data <- x %>% 
    mutate(date = as.Date(date),
           outcome = as.numeric(as.character(outcome))) %>%
    # remove missing lagged data
    filter(!is.na(gpm_smk10unit_lag5))

  # create lagged matrix
  pm_mat <- as.matrix(dplyr::select(data, gpm_smk10unit, gpm_smk10unit_lag1, 
                             gpm_smk10unit_lag2, gpm_smk10unit_lag3,
                             gpm_smk10unit_lag4, gpm_smk10unit_lag5))
  # temp matrix
  temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
                               temp_f_grid_lag2, temp_f_grid_lag3,
                               temp_f_grid_lag4, temp_f_grid_lag5))
  
  # ozone matrix
  o3_mat <- as.matrix(dplyr::select(data, o3_8hr_max_ppb, o3_8hr_max_ppb_lag1,
                             o3_8hr_max_ppb_lag2, o3_8hr_max_ppb_lag3,
                             o3_8hr_max_ppb_lag4, o3_8hr_max_ppb_lag5))
  # define lagged basis spline
  exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
  
  # pm basis
  pm_basis <- pm_mat %*% exp_b
  # temp basis
  temp_basis <- temp_mat %*% exp_b
  # ozone basis
  o3_basis <- o3_mat %*% exp_b
  # run lagged model
  lag_mod <- clogit(outcome ~ pm_basis + o3_basis + temp_basis + strata(id), 
                    data = data)

  # estimate lag estimate
  lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b = exp_b) 
  return(lag_est)
  }) %>%  #end lappply
  # bind rows
  map_dfr(.,rbind) %>% 
  # bind in outcome names
  bind_cols(data.frame(out_rep), .) 
# get the cumulative effect over 6 days
cumulative_results <- filter(dl_results, type == 'cumulative' & time == 5) %>% 
    dplyr::select(-strata, -time) %>% 
    mutate(outcome = forcats::fct_relevel(outcome, out_order),
         group = as.factor(if_else(outcome %in% out_order[1:5],
         "Respiratory", "Cardiovascular")))

Cumulative effect of 0 to 5 days of smoke exposure for a 10 ug/m^3.

# plot
plot <- ggplot(data = cumulative_results, 
               aes(x=outcome, y = odds_ratio, color = group)) +
  geom_point() +
  geom_errorbar(aes(ymin=lower_95, ymax=upper_95), width = 0.3) +
  scale_color_manual("Cardiopulmonary", values = c("#ff00cc", "darkblue")) +
  geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
  ylab(expression(paste("Odds Ratio: 10", mu, "g/m"^3, " Increase in Smoke PM"[2.5]))) +
  xlab("Outcome") +
  theme_bw()+
  theme(panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype = "dotted"),
        panel.grid.minor = element_blank(),
        axis.text.x = element_text(angle = 45, hjust=0.95, vjust = 0.75,
                                   size = 10))

print(plot)

There may be an association with hospitalizations for asthma and certain cardiovascular events (cerebrovascular, heart failure, IHD) for every 10 ug/m^3 of smoke PM2.5 over the course of 6 days of smoke exposure.

Table of cumulative effects results for a 10 ug/m^3 increase in smoke PM2.5

knitr::kable(
  dplyr::select(cumulative_results, outcome, type:upper_95) %>%
    mutate_at(vars(odds_ratio:upper_95), ~round(.x, 3)), 
  caption = 'Cumulative effect of smoke on Cardiopulmonary Hospitalizations')
Cumulative effect of smoke on Cardiopulmonary Hospitalizations
outcome type odds_ratio lower_95 upper_95
All Respiratory cumulative 1.029 0.949 1.116
Asthma cumulative 1.284 1.040 1.584
COPD cumulative 1.053 0.849 1.306
Acute Bronchitis cumulative 1.422 0.927 2.181
Pneumonia cumulative 1.035 0.894 1.198
All CVD cumulative 1.039 0.977 1.106
Arrhythmia cumulative 1.085 0.922 1.278
Cerebrovascular cumulative 1.183 1.013 1.381
Heart Failure cumulative 1.195 1.021 1.399
Ischemic Heart Disease cumulative 1.052 0.901 1.228
Myocardial Infarction cumulative 0.981 0.854 1.127

Constrained distributed lag effects for a 10 ug/m^3 increase in smoke

Creating a plot showing the individual lagged day effects on cardiopulmonary hospitalizations. This is a constrained distributed lag model, meaning all lagged days are accounted for in the model.

constrained_lag_results <- dl_results %>% 
  mutate(outcome = forcats::fct_relevel(out_rep, out_order),
         group = as.factor(if_else(outcome %in% out_order[1:5],
         "Respiratory", "Cardiovascular"))) %>% 
  filter(type == 'lag')

# constrained distributed lag
dl_hosp_plot <- ggplot(data=constrained_lag_results, 
    aes(x=time, y=odds_ratio, group = group, color = group, fill = group)) +
  geom_line(size = 1) +
  geom_ribbon(aes(ymin = lower_95, ymax = upper_95), color = 'transparent',
              alpha = 0.5) +
  facet_wrap(~outcome, scales = 'free_y', ncol = 4) +
  scale_color_manual("Cardiopulmonary", values = c("#ff00cc", "darkblue")) +
  scale_fill_manual("Cardiopulmonary", values = c("#ff00cc", "darkblue")) +
  geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
  ylab(expression(paste("Odds Ratio: 10", mu, "g/m"^3, " Increase in Smoke PM"[2.5]))) +
  xlab("Lagged Days") +
  theme_bw()+
  theme(panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype = "dotted"),
        legend.direction = 'horizontal',
        legend.position = 'bottom',
        panel.grid.minor = element_blank(),
        strip.background = element_blank())

print(dl_hosp_plot)

For all respiratory and asthma, there appears to be a inverse association (or trending towards an inverse association) on the same day of exposure, with a steady increase in the risk on day 3. I think the acute bronchitis and pneumonia signals are noisy and the day 3 association is not likely real (suggesting that maybe my model decision is not great). It could be that I need to further reduce the lagged days evaluated to maybe 0 to 2 since the smoke events are pretty brief.

knitr::kable(
  dplyr::select(constrained_lag_results, outcome, time:upper_95) %>% 
    mutate_at(vars(odds_ratio:upper_95), ~round(.x, 3)),
  caption = 'Distributed Lag Daily Associations with Smoke and Cardiopulmonary Hospitalizations')
Distributed Lag Daily Associations with Smoke and Cardiopulmonary Hospitalizations
outcome time odds_ratio lower_95 upper_95
All Respiratory 0 0.918 0.875 0.962
All Respiratory 1 0.985 0.964 1.007
All Respiratory 2 1.038 1.009 1.069
All Respiratory 3 1.057 1.026 1.088
All Respiratory 4 1.037 1.016 1.059
All Respiratory 5 1.000 0.955 1.047
Asthma 0 0.913 0.802 1.040
Asthma 1 1.016 0.961 1.075
Asthma 2 1.099 1.018 1.187
Asthma 3 1.125 1.042 1.215
Asthma 4 1.090 1.032 1.150
Asthma 5 1.026 0.906 1.162
COPD 0 0.891 0.786 1.010
COPD 1 0.983 0.928 1.041
COPD 2 1.057 0.979 1.142
COPD 3 1.082 1.002 1.168
COPD 4 1.053 0.996 1.113
COPD 5 0.999 0.884 1.130
Acute Bronchitis 0 1.000 0.781 1.281
Acute Bronchitis 1 1.009 0.908 1.122
Acute Bronchitis 2 1.026 0.880 1.195
Acute Bronchitis 3 1.058 0.906 1.237
Acute Bronchitis 4 1.109 0.992 1.239
Acute Bronchitis 5 1.170 0.917 1.493
Pneumonia 0 0.981 0.902 1.067
Pneumonia 1 1.016 0.977 1.057
Pneumonia 2 1.039 0.986 1.095
Pneumonia 3 1.035 0.982 1.090
Pneumonia 4 1.004 0.967 1.043
Pneumonia 5 0.961 0.884 1.045
All CVD 0 0.984 0.949 1.020
All CVD 1 1.001 0.985 1.018
All CVD 2 1.014 0.992 1.037
All CVD 3 1.019 0.996 1.042
All CVD 4 1.015 0.999 1.031
All CVD 5 1.007 0.971 1.043
Arrhythmia 0 1.058 0.965 1.160
Arrhythmia 1 0.988 0.946 1.032
Arrhythmia 2 0.948 0.893 1.006
Arrhythmia 3 0.958 0.904 1.016
Arrhythmia 4 1.022 0.979 1.066
Arrhythmia 5 1.119 1.019 1.227
Cerebrovascular 0 1.035 0.948 1.131
Cerebrovascular 1 1.026 0.985 1.068
Cerebrovascular 2 1.020 0.964 1.078
Cerebrovascular 3 1.021 0.965 1.079
Cerebrovascular 4 1.029 0.987 1.072
Cerebrovascular 5 1.040 0.951 1.138
Heart Failure 0 1.038 0.949 1.136
Heart Failure 1 1.026 0.985 1.070
Heart Failure 2 1.019 0.963 1.079
Heart Failure 3 1.021 0.964 1.080
Heart Failure 4 1.031 0.989 1.075
Heart Failure 5 1.046 0.955 1.146
Ischemic Heart Disease 0 0.921 0.841 1.010
Ischemic Heart Disease 1 1.023 0.983 1.066
Ischemic Heart Disease 2 1.096 1.038 1.158
Ischemic Heart Disease 3 1.094 1.036 1.156
Ischemic Heart Disease 4 1.018 0.978 1.060
Ischemic Heart Disease 5 0.913 0.834 1.000
Myocardial Infarction 0 0.950 0.876 1.031
Myocardial Infarction 1 0.999 0.963 1.037
Myocardial Infarction 2 1.034 0.983 1.087
Myocardial Infarction 3 1.036 0.985 1.089
Myocardial Infarction 4 1.005 0.970 1.042
Myocardial Infarction 5 0.959 0.885 1.040

Unconstrained Lag Model for Hospitalizations

Running an unconstrained lag model where I model the cooresponding lagged day of smoke, adjusting for the same lag day of temperature and ozone. This model is for comparison with our 2012 Washington paper and to see how sensitive our distributed lag models are.

# repeat outcomes 4 times
out_rep <- rep(outcome, each = 6)
# day to repeat
day <- rep(0:5, times = 11)

# lag day vector
lag_days <- c('', '_lag1', '_lag2', '_lag3', '_lag4', '_lag5')

# map across lagged days
hosp_uc_lag_results <- map_dfr(co_hosp_list, function(df){
  data <- df
    results <- map_dfr(lag_days, function(x){
      # define function
      f <- as.formula(paste('outcome~',
        paste(paste0(c('gpm_smk10unit', 'o3_8hr_max_ppb', 'temp_f_grid'), 
                     x), collapse = '+'),
        '+strata(id)'))
      r <- broom::tidy(clogit(f, data = data)) %>% 
      filter(term == paste0('gpm_smk10unit',x)) %>% 
      dplyr::select(term, estimate, conf.low, conf.high) %>% 
      mutate_at(vars(estimate:conf.high), exp)
    })
  }) %>% 
  cbind(out_rep, day, .) %>% 
  mutate(outcome = forcats::fct_relevel(out_rep, out_order),
         group = as.factor(if_else(outcome %in% out_order[1:5],
         "Respiratory", "Cardiovascular"))) 

Plot of unconstrained lag results for hospitalizations.

uc_plot <- ggplot(data=hosp_uc_lag_results, aes(x=day, y=estimate, 
                                                group = group, color = group)) +
  geom_point() +
  geom_errorbar(aes(ymin=conf.low, ymax=conf.high), width = 0.3) +
  facet_wrap(~outcome, scales = 'free_y', ncol = 4) +
  scale_color_manual("Cardiopulmonary", values = c("#ff00cc", "darkblue")) +
  geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
  ylab(expression(paste("Odds Ratio: 10", mu, "g/m"^3, " Increase in Smoke PM"[2.5]))) +
  xlab("Lagged Days") +
  theme_bw()+
  theme(panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype = "dotted"),
        legend.direction = 'horizontal',
        legend.position = 'bottom',
        panel.grid.minor = element_blank(),
        strip.background = element_blank())

print(uc_plot)

In the context of the distributed lag models, this may help confirm some of the associations. I think the respiratory and asthma trends are supported by these plots. Same with ischemic heart disease.

Unconstrained lag associations table.

knitr::kable(dplyr::select(hosp_uc_lag_results, outcome, day, estimate:conf.high) %>%
    mutate_at(vars(estimate:conf.high), ~round(.x, 3)),
  caption = 'Unconstrained Lag Daily Associations with Smoke and Cardiopulmonary Hospitalizations')
Unconstrained Lag Daily Associations with Smoke and Cardiopulmonary Hospitalizations
outcome day estimate conf.low conf.high
All Respiratory 0 0.950 0.901 1.003
All Respiratory 1 0.993 0.943 1.046
All Respiratory 2 1.040 0.988 1.094
All Respiratory 3 1.106 1.053 1.162
All Respiratory 4 1.060 1.007 1.116
All Respiratory 5 1.036 0.984 1.090
Asthma 0 0.989 0.854 1.145
Asthma 1 1.101 0.968 1.251
Asthma 2 1.197 1.049 1.366
Asthma 3 1.259 1.109 1.430
Asthma 4 1.146 1.000 1.313
Asthma 5 1.185 1.035 1.356
COPD 0 0.912 0.791 1.050
COPD 1 1.069 0.935 1.222
COPD 2 1.005 0.880 1.148
COPD 3 1.148 1.004 1.312
COPD 4 1.137 0.997 1.297
COPD 5 1.031 0.898 1.183
Acute Bronchitis 0 1.141 0.872 1.495
Acute Bronchitis 1 0.979 0.741 1.293
Acute Bronchitis 2 1.099 0.837 1.442
Acute Bronchitis 3 1.374 1.049 1.801
Acute Bronchitis 4 1.328 1.004 1.757
Acute Bronchitis 5 1.201 0.909 1.588
Pneumonia 0 1.028 0.935 1.130
Pneumonia 1 1.018 0.928 1.117
Pneumonia 2 1.046 0.952 1.149
Pneumonia 3 1.084 0.992 1.185
Pneumonia 4 1.020 0.931 1.118
Pneumonia 5 0.971 0.885 1.066
All CVD 0 0.988 0.948 1.029
All CVD 1 1.050 1.010 1.091
All CVD 2 1.015 0.975 1.056
All CVD 3 1.029 0.991 1.069
All CVD 4 1.038 0.997 1.080
All CVD 5 1.019 0.980 1.060
Arrhythmia 0 1.036 0.935 1.149
Arrhythmia 1 1.020 0.920 1.132
Arrhythmia 2 0.954 0.859 1.059
Arrhythmia 3 0.984 0.889 1.089
Arrhythmia 4 1.094 0.983 1.219
Arrhythmia 5 1.078 0.972 1.197
Cerebrovascular 0 1.048 0.949 1.158
Cerebrovascular 1 1.122 1.020 1.235
Cerebrovascular 2 1.050 0.953 1.157
Cerebrovascular 3 1.078 0.978 1.189
Cerebrovascular 4 1.073 0.969 1.189
Cerebrovascular 5 1.077 0.975 1.189
Heart Failure 0 1.055 0.954 1.167
Heart Failure 1 1.122 1.018 1.237
Heart Failure 2 1.050 0.952 1.159
Heart Failure 3 1.090 0.987 1.203
Heart Failure 4 1.078 0.972 1.196
Heart Failure 5 1.082 0.978 1.197
Ischemic Heart Disease 0 0.972 0.877 1.079
Ischemic Heart Disease 1 1.104 1.007 1.210
Ischemic Heart Disease 2 1.101 1.000 1.213
Ischemic Heart Disease 3 1.124 1.025 1.232
Ischemic Heart Disease 4 1.056 0.959 1.163
Ischemic Heart Disease 5 0.961 0.868 1.064
Myocardial Infarction 0 0.980 0.895 1.074
Myocardial Infarction 1 1.002 0.918 1.094
Myocardial Infarction 2 1.018 0.932 1.112
Myocardial Infarction 3 1.069 0.984 1.160
Myocardial Infarction 4 0.965 0.881 1.057
Myocardial Infarction 5 0.993 0.908 1.085

Sex-Specific Stratification

Using the distributed lag model for each sex strata. Also modifying code to pull out n cases analyzed.

# estimating lagged effects
out_rep <- rep(outcome, each = 24)

sex_strata <- c('F', 'M')

sex_results  <- map_dfr(co_hosp_list, function(df){
  results <- map_dfr(sex_strata, function(x){
   data <- df %>% 
     filter(sex == x) %>% 
     mutate(date = as.Date(date),
           outcome = as.numeric(as.character(outcome))) %>%
    # remove missing lagged data
    filter(!is.na(gpm_smk10unit_lag5))

    # create lagged matrix
    pm_mat <- as.matrix(dplyr::select(data, gpm_smk10unit, gpm_smk10unit_lag1, 
                             gpm_smk10unit_lag2, gpm_smk10unit_lag3, 
                             gpm_smk10unit_lag4, gpm_smk10unit_lag5))
  
    # temp matrix
    temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
                                 temp_f_grid_lag2, temp_f_grid_lag3,
                                 temp_f_grid_lag4, temp_f_grid_lag5))
    
    # ozone matrix
    o3_mat <- as.matrix(dplyr::select(data, o3_8hr_max_ppb, o3_8hr_max_ppb_lag1,
                               o3_8hr_max_ppb_lag2, o3_8hr_max_ppb_lag3, 
                               o3_8hr_max_ppb_lag4, o3_8hr_max_ppb_lag5))
    
    # define lagged basis spline
    exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
    # pm basis
    pm_basis <- pm_mat %*% exp_b
    # temp basis
    temp_basis <- temp_mat %*% exp_b
    # ozone basis
    o3_basis <- o3_mat %*% exp_b
    # run lagged model
    lag_mod <- clogit(outcome ~ pm_basis + o3_basis + temp_basis + strata(id), 
                      data = data)
  
    # estimate lag estimate
    lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b) %>% 
      mutate(sex = x)
    return(lag_est)
  }) 
}) %>% 
  # bind in outcome names
  bind_cols(data.frame(out_rep), .) 

Sex-specific plots of the cumulative effect.

# subset to cumulative results
cumulative_sex_strata <- sex_results %>% 
  filter(type == 'cumulative' & time == 5) %>% 
  mutate(sex_long = if_else(sex == 'F', 'Female', 'Male'),
         outcome = forcats::fct_relevel(out_rep, out_order),
         group = as.factor(if_else(outcome %in% out_order[1:5],
         "Respiratory", "Cardiovascular"))) 

# plot
sex_plot <- ggplot(data=cumulative_sex_strata, aes(x=sex_long, y=odds_ratio, 
                                        group = group, color = group)) +
  geom_point() +
  geom_errorbar(aes(ymin=lower_95, ymax=upper_95), width = 0.3) +
  facet_wrap(~outcome, scales = 'free_y', ncol = 4) +
  scale_color_manual("Cardiopulmonary", values = c("#ff00cc", "darkblue")) +
  geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
  ylab(expression(paste("Odds Ratio: 10", mu, "g/m"^3, " Increase in Smoke PM"[2.5]))) +
  xlab("Sex") +
  theme_bw()+
  theme(panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype = "dotted"),
        legend.direction = 'horizontal',
        legend.position = 'bottom',
        panel.grid.minor = element_blank(),
        strip.background = element_blank())

print(sex_plot)

Sex-specific estimates table.

knitr::kable(dplyr::select(cumulative_sex_strata, outcome, sex_long, odds_ratio:upper_95) %>%
    mutate_at(vars(odds_ratio:upper_95), ~round(.x, 3)),
             caption = 'Sex-specific cumulative effect of smoke on hosptializations.')
Sex-specific cumulative effect of smoke on hosptializations.
outcome sex_long odds_ratio lower_95 upper_95
All Respiratory Female 1.005 0.897 1.125
All Respiratory Male 1.055 0.939 1.185
Asthma Female 1.268 0.945 1.701
Asthma Male 1.292 0.954 1.749
COPD Female 1.008 0.743 1.369
COPD Male 1.095 0.807 1.487
Acute Bronchitis Female 1.190 0.629 2.250
Acute Bronchitis Male 1.683 0.937 3.022
Pneumonia Female 1.005 0.820 1.232
Pneumonia Male 1.066 0.863 1.316
All CVD Female 1.041 0.951 1.139
All CVD Male 1.037 0.952 1.130
Arrhythmia Female 1.229 0.980 1.541
Arrhythmia Male 0.950 0.751 1.203
Cerebrovascular Female 1.188 0.906 1.557
Cerebrovascular Male 1.178 0.975 1.423
Heart Failure Female 1.215 0.919 1.605
Heart Failure Male 1.185 0.979 1.434
Ischemic Heart Disease Female 1.071 0.859 1.334
Ischemic Heart Disease Male 1.035 0.833 1.287
Myocardial Infarction Female 1.060 0.881 1.275
Myocardial Infarction Male 0.883 0.715 1.090

Age-Specific Stratification

Age category stratified results.

# estimating lagged effects
out_rep <- rep(outcome, each = 36)

age_strata <- c('age_under_15', 'age_15_to_65', 'age_over_65')

age_results  <- map_dfr(co_hosp_list, function(df){
  results <- map_dfr(age_strata, function(x){
   data <- df %>% 
     filter(age_cat == x) %>% 
     mutate(date = as.Date(date),
           outcome = as.numeric(as.character(outcome))) %>%
    # remove missing lagged data
    filter(!is.na(gpm_smk10unit_lag5))
   
    # create lagged matrix
    pm_mat <- as.matrix(dplyr::select(data, gpm_smk10unit, gpm_smk10unit_lag1, 
                             gpm_smk10unit_lag2, gpm_smk10unit_lag3,
                             gpm_smk10unit_lag4, gpm_smk10unit_lag5))
  
    # temp matrix
    temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
                                 temp_f_grid_lag2, temp_f_grid_lag3,
                                 temp_f_grid_lag4, temp_f_grid_lag5))
    
    # ozone matrix
    o3_mat <- as.matrix(dplyr::select(data, o3_8hr_max_ppb, o3_8hr_max_ppb_lag1,
                               o3_8hr_max_ppb_lag2, o3_8hr_max_ppb_lag3,
                               o3_8hr_max_ppb_lag4, o3_8hr_max_ppb_lag5))
    
    # define lagged basis spline
    exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
    # pm basis
    pm_basis <- pm_mat %*% exp_b
    # temp basis
    temp_basis <- temp_mat %*% exp_b
    # ozone basis
    o3_basis <- o3_mat %*% exp_b
    # run lagged model
    lag_mod <- tryCatch(clogit(outcome ~ pm_basis + o3_basis + temp_basis + strata(id), 
                      data = data))
  
    # estimate lag estimate
    lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b) %>% 
      mutate(age_cat = x)
    return(lag_est)
  })
}) %>% 
  # bind in outcome names
  bind_cols(data.frame(out_rep), .) 

Age-specific plots of the cumulative effect.

# subset to cumulative results
cumulative_age_strata <- age_results %>% 
  filter(type == 'cumulative' & time == 5) %>% 
  mutate(age_long = forcats::fct_relevel(
      case_when(age_cat == 'age_under_15' ~ 'Age < 15',
                age_cat == 'age_15_to_65' ~ 'Age 15 to 64',
                age_cat == 'age_over_65' ~ 'Age > 64'),
      c('Age < 15', 'Age 15 to 64', 'Age > 64')),
         outcome = forcats::fct_relevel(out_rep, out_order),
         group = as.factor(if_else(outcome %in% out_order[1:5],
         "Respiratory", "Cardiovascular"))) %>% 
  filter(upper_95 < 3.5)

# plot
age_plot <- ggplot(data=cumulative_age_strata, aes(x=age_long, y=odds_ratio, 
                                        group = group, color = group)) +
  geom_point() +
  geom_errorbar(aes(ymin=lower_95, ymax=upper_95), width = 0.3) +
  facet_wrap(~outcome, scales = 'free_y', ncol = 4) +
  scale_color_manual("Cardiopulmonary", values = c("#ff00cc", "darkblue")) +
  geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
  ylab(expression(paste("Odds Ratio: 10", mu, "g/m"^3, " Increase in Smoke PM"[2.5]))) +
  xlab("Sex") +
  theme_bw()+
  theme(panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype = "dotted"),
        legend.direction = 'horizontal',
        legend.position = 'bottom',
        panel.grid.minor = element_blank(),
        strip.background = element_blank(),
        axis.text.x = element_text(angle = 45, hjust=0.95, vjust = 0.75))

print(age_plot)

Age-specific estimates table.

knitr::kable(dplyr::select(cumulative_age_strata, outcome, age_long, odds_ratio:upper_95) %>%
    mutate_at(vars(odds_ratio:upper_95), ~round(.x, 3)),
             caption = 'Age-specific cumulative effect of smoke on hosptializations.')
Age-specific cumulative effect of smoke on hosptializations.
outcome age_long odds_ratio lower_95 upper_95
All Respiratory Age < 15 1.271 0.931 1.734
All Respiratory Age 15 to 64 0.978 0.865 1.106
All Respiratory Age > 64 1.044 0.931 1.171
Asthma Age < 15 0.526 0.240 1.156
Asthma Age 15 to 64 1.338 1.035 1.731
Asthma Age > 64 1.510 0.993 2.298
COPD Age 15 to 64 1.231 0.878 1.726
COPD Age > 64 0.945 0.712 1.252
Acute Bronchitis Age 15 to 64 0.334 0.102 1.090
Acute Bronchitis Age > 64 0.626 0.161 2.436
Pneumonia Age < 15 0.803 0.368 1.752
Pneumonia Age 15 to 64 0.883 0.691 1.128
Pneumonia Age > 64 1.155 0.957 1.394
All CVD Age < 15 0.944 0.359 2.482
All CVD Age 15 to 64 1.037 0.939 1.144
All CVD Age > 64 1.042 0.962 1.128
Arrhythmia Age 15 to 64 0.858 0.632 1.165
Arrhythmia Age > 64 1.196 0.986 1.453
Cerebrovascular Age 15 to 64 1.192 0.948 1.500
Cerebrovascular Age > 64 1.173 0.950 1.447
Heart Failure Age 15 to 64 1.204 0.953 1.522
Heart Failure Age > 64 1.185 0.957 1.467
Ischemic Heart Disease Age 15 to 64 0.938 0.689 1.278
Ischemic Heart Disease Age > 64 1.093 0.913 1.309
Myocardial Infarction Age 15 to 64 1.062 0.837 1.349
Myocardial Infarction Age > 64 0.943 0.795 1.119

Mortality

Since we have mortality data, I’m also going to evaluate the association between grid-level smoke PM2.5 and cardiopulmonary mortality. I’ve created time-stratified case-crossover data frames of cardiopulmonary mortality events with referent period within the same month. I’ve also limit mortality case-crossover events to Front Range counties from May to October from 2010 to 2015.

# load casecrossover list -----
load("../../data/health/co_mortality_cc_list.RData")
# load mortality outcome list
load("../../data/health/icd10_outcome.RData")
# extract a vector of outcome names from the icd10 outcomes list 
outcomes <- names(icd10_outcomes)

# reduce case-crossover list to only summer months and join pm
co_mortality_list <- casecross_list %>% 
  # desired format to make sure it's right
  map(~ mutate(., outcome = as.numeric(as.character(outcome)),
               date = as.Date(as.character(date_of_death)),
               month = as.factor(lubridate::month(date))) %>% 
      # filter out 2016; I don't have pm data yet
      filter(date <= "2015-12-30") %>% 
      filter(fips %in% fr_fips)) %>% 
  # add outcome name to each dataframe
  map2(.x = ., .y = outcomes, ~mutate(.x, out_name = .y))

Time Series of Respiratory and Cardiovascular Deaths in Colorado

Time series of events for respiratory and cvd only. I also aggregated across front range counties for sufficient numbers. Looking for any potential time trends that may need to be accounted for.

# estimate deaths 
ts_death <- co_mortality_list %>% 
  map_dfr(. , function(df){
    counts <- df %>% 
      filter(outcome == 1) %>% 
      group_by(out_name, date) %>% 
      summarise(n = n())
    }) %>% 
  filter(out_name %in% c('resp', 'cvd'))

Mortality time-series plot.

ts_death_plot <- ggplot(data=ts_death, aes(x=date, y=n)) +
  geom_point() +
  facet_wrap(~out_name) +
  theme_minimal()

ts_death_plot

There is a general increase in cardiovascular deaths over time, which I’m guessing corresponds with the population increase of Colorado. Rates probably would show a more stable rate. Interesting observation in the respiratory related morbidity is the spike at the end of 2014, which I think is because of the bad flu season. I remember Kirk saying this.

I am going to limit mortality to May to October months in 2010 to 2015 and join with PM2.5 estimates.

# extract names
cause_death <- c('Respiratory', 'Asthma', 'COPD', 'CVD', 'Heart Failure',
                 'Cardiac Arrest', 'Ischemic Heart Disease', 
                 'Myocardial Infarction', 'Cerebrovascular')

# reduce case-crossover list to only summer months and join GRID ID
co_death_list <- co_mortality_list %>% 
  # filter out 2016; I don't have pm data yet
  map(~ filter(., date <= "2015-12-30") %>% 
      filter(month %in% 5:10) %>% 
      filter(fips %in% fr_fips) %>% 
      mutate(WRFGRID_ID = as.character(wrfgrid_id),
             # age category
             age_cat = case_when(age < 15 ~ 'age_under_15',
                                 age >= 15 & age < 65 ~ 'age_15_to_65',
                                 age >= 65 ~ 'age_over_65')) %>% 
      dplyr::select(-month, -wrfgrid_id) %>%
      # join with grid key
      left_join(grid_key, by = 'WRFGRID_ID') %>% 
      left_join(co_pm, by = c("fips", "date")) %>% 
      # left join grid pm
      left_join(grid_pm, by = c("GRID_ID", "date")) %>% 
      left_join(ozone, by = c("GRID_ID", "date"))) %>% 
  # add outcome name to each dataframe
  map2(.x = ., .y = cause_death, ~mutate(.x, out_name = .y))

Strata Counts

Number of cardiopulmonary deaths in Front Range counties during this time period.

strata <- c('All', 'Sex', 'Age')

# estimate deaths 
death_counts <- co_death_list %>% 
  map_dfr(.,function(df){
    map_dfr(strata, function(x){
    if(x == 'All'){
      counts <- df %>% 
        filter(outcome == 1) %>% 
        group_by(out_name) %>% 
        summarise(n_events = n()) %>% 
        mutate(strata = x) %>% 
        dplyr::select(out_name, strata, n_events)
    } else if(x == 'Sex'){
      counts <- df %>% 
        filter(outcome == 1) %>% 
        group_by(out_name, sex) %>% 
        summarise(n_events = n()) %>% 
        rename(strata = sex) %>% 
        dplyr::select(out_name, strata, n_events)
    } else {
      counts <- df %>% 
        filter(outcome == 1) %>% 
        group_by(out_name, age_cat) %>% 
        summarise(n_events = n()) %>% 
        rename(strata = age_cat) %>% 
        dplyr::select(out_name, strata, n_events)
    }
    }) # end strata map
  }) %>% # end outcome list map
  spread(strata, n_events) %>% 
  dplyr::select(out_name, All, F, M, age_under_15, age_15_to_65, age_over_65) %>% 
  mutate_at(vars(F:age_over_65), funs(round((./All)*100,1)))

# print death counts
knitr::kable(death_counts, caption='Front Range Cardiopulmonary Deaths and Strata Proportion')
Front Range Cardiopulmonary Deaths and Strata Proportion
out_name All F M age_under_15 age_15_to_65 age_over_65
Asthma 101 68.3 31.7 4.0 44.6 51.5
Cardiac Arrest 157 58.0 42.0 NA 18.5 81.5
Cerebrovascular 3443 60.1 39.9 0.3 14.6 85.1
COPD 4056 51.2 48.8 NA 12.4 87.6
CVD 18122 49.9 50.1 0.2 20.0 79.8
Heart Failure 1395 59.5 40.5 0.1 6.3 93.6
Ischemic Heart Disease 7520 39.9 60.1 0.0 23.2 76.8
Myocardial Infarction 1948 40.9 59.1 NA 25.4 74.6
Respiratory 7025 50.5 49.5 0.4 14.9 84.7

Assessing how many CVD deaths are missing a grid assignment but were assigned a county PM2.5. Not that many missing (~2%).

missing <- co_death_list[[4]] %>% 
  filter(outcome == 1) %>% 
  mutate(county_miss = ifelse(is.na(pm_krig), 1, 0),
         grid_miss = ifelse(is.na(pm25_grid), 1, 0)) %>% 
  group_by(county_miss, grid_miss) %>% 
  summarize(n = n()) %>% 
  mutate(prop = round(n/(17720+402),2))

knitr::kable(missing, caption = 'CVD mortality missing grid PM2.5')
CVD mortality missing grid PM2.5
county_miss grid_miss n prop
0 0 17493 0.97
0 1 629 0.03

Same Day Association between Smoke and Death

Same analysis as hospitalizations where likelihood of a cardiopulmonary death is regressed a 10 ug/m^3 increase in smoke PM2.5. Model accounts for within subject variability and adjusts for temperature.

# same day results
same_day_death <- co_death_list %>% 
  map_dfr(., function(df){
    out_name <- unique(df$out_name)
    result <- broom::tidy(clogit(outcome ~ gpm_smk10unit + o3_8hr_max_ppb + 
                                 temp_f_grid + strata(id),
                                 data = df)) %>% 
      filter(term == 'gpm_smk10unit') %>% 
      dplyr::select(term, estimate, conf.low, conf.high) %>% 
      mutate_at(vars(estimate:conf.high), exp) %>% 
      mutate(outcome_name = out_name)
  }) %>% 
  mutate(outcome_name = forcats::fct_relevel(outcome_name, .$outcome_name),
         group = as.factor(ifelse(outcome_name %in% c('Respiratory', 'Asthma', 'COPD'),
                        'Respiratory', 'Cardiovascular')))

Mortality same day association plot.

death_plot <- ggplot(data=same_day_death, aes(x=outcome_name, y = estimate, 
                                              colour = group)) +
  geom_point() +
  geom_errorbar(aes(ymin=conf.low, ymax=conf.high), width = 0.3) +
  scale_color_manual("Cardiopulmonary", values = c("#ff00cc", "darkblue")) +
  geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
  ylab(expression(paste("Odds Ratio: 10", mu, "g/m"^3, " Increase in Smoke PM"[2.5]))) +
  xlab("Underlying Cause of Death") +
  theme_bw()+
  theme(panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype = "dotted"),
        panel.grid.minor = element_blank(),
        axis.text.x = element_text(angle = 45, hjust=0.95, vjust = 0.75))

print(death_plot)

Note there are very few deaths with underlying cause of death as asthma and cardiac arrest and thus a lot of variability. I’m going to plot without these two outcomes so it’s easier to read.

Underlying cause of death plot I’ll use in the paper.

death_plot2 <- ggplot(data = filter(same_day_death, 
                                    !(outcome_name %in% c('Asthma', 'Cardiac Arrest'))),  
                                    aes(x=outcome_name, y = estimate, colour = group)) +
  geom_point() +
  geom_errorbar(aes(ymin=conf.low, ymax=conf.high), width = 0.3) +
  scale_color_manual("Cardiopulmonary", values = c("#ff00cc", "darkblue")) +
  geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
  ylab(expression(paste("Odds Ratio: 10", mu, "g/m"^3, " Increase in Smoke PM"[2.5]))) +
  xlab("Underlying Cause of Death") +
  theme_bw()+
  theme(panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype = "dotted"),
        panel.grid.minor = element_blank(),
        axis.text.x = element_text(angle = 45, hjust=0.95, vjust = 0.75))

print(death_plot2)

No significant associations with a 10 ug/m^3 increase in smoke PM2.5 and cardiopulmonary morbidity.

Here is a table of the same-day mortality association.

knitr::kable(dplyr::select(same_day_death, outcome_name, estimate:conf.high) %>%
    mutate_at(vars(estimate:conf.high), ~round(.x, 3)),
             caption = "Same Day Association with Smoke and Mortality")
Same Day Association with Smoke and Mortality
outcome_name estimate conf.low conf.high
Respiratory 1.072 0.943 1.219
Asthma 1.746 0.788 3.868
COPD 1.014 0.862 1.192
CVD 1.024 0.944 1.111
Heart Failure 0.935 0.699 1.252
Cardiac Arrest 2.932 1.111 7.740
Ischemic Heart Disease 0.968 0.854 1.099
Myocardial Infarction 1.168 0.900 1.516
Cerebrovascular 1.135 0.940 1.370

Cumulative effect for a > 10 ug/m^3 of smoke exposure over 6 days on risk of death

Looking at cumulative exposure of 0 to 5 lagged days of smoke exposure on risk for cardiopulmonary death.

cod <- rep(cause_death, each = 12)

dl_death_results  <- lapply(co_death_list, function(x){
  # output dataframe from list
  data <- x %>% 
    mutate(date = as.Date(date),
           outcome = as.numeric(as.character(outcome))) %>%
    # remove missing lagged data
    filter(!is.na(gpm_smk10unit_lag5))

  # create lagged matrix
  pm_mat <- as.matrix(dplyr::select(data, gpm_smk10unit, gpm_smk10unit_lag1, 
                             gpm_smk10unit_lag2, gpm_smk10unit_lag3,
                             gpm_smk10unit_lag4, gpm_smk10unit_lag5))
  
  # temp matrix
  temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
                               temp_f_grid_lag2, temp_f_grid_lag3,
                               temp_f_grid_lag4, temp_f_grid_lag5))
  
  # ozone matrix
  o3_mat <- as.matrix(dplyr::select(data, o3_8hr_max_ppb, o3_8hr_max_ppb_lag1,
                             o3_8hr_max_ppb_lag2, o3_8hr_max_ppb_lag3, 
                             o3_8hr_max_ppb_lag4, o3_8hr_max_ppb_lag5))
  
  # define lagged basis spline
  exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
  # pm basis
  pm_basis <- pm_mat %*% exp_b
  # temp basis
  temp_basis <- temp_mat %*% exp_b
  # ozone basis
  o3_basis <- o3_mat %*% exp_b
  
  # run lagged model
  lag_mod <- clogit(outcome ~ pm_basis + temp_basis + o3_basis + strata(id), data = data)

  # estimate lag estimate
  lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b) 
  return(lag_est)
  }) %>%  #end lappply
  # bind rows
  map_dfr(.,rbind) %>% 
  # bind in outcome names
  bind_cols(data.frame(cod), .) 

Cumulative effect death.

cumulative_death_results <- filter(dl_death_results, 
                                   type == 'cumulative' & time == 5) %>% 
    dplyr::select(-strata, -time) %>% 
    mutate(outcome = forcats::fct_relevel(cod, cause_death),
         group = as.factor(if_else(cod %in% cause_death[1:3],
         "Respiratory", "Cardiovascular"))) %>% 
    filter(!(outcome %in% c('Asthma', 'Cardiac Arrest')))

Plot of cumulative effect.

# plot
cumulative_death_plot <- ggplot(data = cumulative_death_results, 
               aes(x=outcome, y = odds_ratio, color = group)) +
  geom_point() +
  geom_errorbar(aes(ymin=lower_95, ymax=upper_95), width = 0.3) +
  scale_color_manual("Cardiopulmonary", values = c("#ff00cc", "darkblue")) +
  geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
  ylab(expression(paste("Odds Ratio: 10 ", mu,"g/m"^3, " Increase in Smoke PM"[2.5]))) +
  xlab("Outcome") +
  theme_bw()+
  theme(panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype = "dotted"),
        panel.grid.minor = element_blank(),
        axis.text.x = element_text(angle = 45, hjust=0.95, vjust = 0.75,
                                   size = 10))

print(cumulative_death_plot)

Removed asthma and cardiac arrest as they were highly variable.

Table of the cumulative effects of smoke on underlying cause of death.

knitr::kable(dplyr::select(cumulative_death_results,outcome, odds_ratio:upper_95) %>%
    mutate_at(vars(odds_ratio:upper_95), ~round(.x, 3)),
             caption = "Cumulative Effect of Smoke on Mortality")
Cumulative Effect of Smoke on Mortality
outcome odds_ratio lower_95 upper_95
Respiratory 1.113 0.907 1.367
COPD 0.920 0.698 1.212
CVD 1.050 0.926 1.191
Heart Failure 0.752 0.475 1.189
Ischemic Heart Disease 1.023 0.839 1.247
Myocardial Infarction 1.055 0.699 1.593
Cerebrovascular 1.080 0.809 1.442

Distributed Lag Estiamtes for Mortality

Plot of daily estimates from the distributed lag model for mortality.

constrained_lag_results_death <- dl_death_results %>% 
  mutate(outcome = forcats::fct_relevel(cod, cause_death),
         group = as.factor(if_else(cod %in% cause_death[1:3],
         "Respiratory", "Cardiovascular"))) %>% 
  filter(!(outcome %in% c('Asthma', 'Cardiac Arrest'))) %>% 
  filter(type == 'lag')

# constrained distributed lag
dl_death_plot <- ggplot(data=constrained_lag_results_death, 
    aes(x=time, y=odds_ratio, group = group, color = group, fill = group)) +
  geom_line(size = 1) +
  geom_ribbon(aes(ymin = lower_95, ymax = upper_95), color = 'transparent',
              alpha = 0.5) +
  facet_wrap(~outcome, scales = 'free_y', ncol = 4) +
  scale_color_manual("Underlying Cause of Death", values = c("#ff00cc", "darkblue")) +
  scale_fill_manual("Underlying Cause of Death", values = c("#ff00cc", "darkblue")) +
  geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
  ylab(expression(paste("Odds Ratio: 10", mu, "g/m"^3, " Increase in Smoke PM"[2.5]))) +
  xlab("Lagged Days") +
  theme_bw()+
  theme(panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype = "dotted"),
        legend.direction = 'horizontal',
        legend.position = 'bottom',
        panel.grid.minor = element_blank(),
        strip.background = element_blank())

print(dl_death_plot)

If you look at the effect sizes, I do think there may be some interesting associations with all cause respiratory and CVD. It also appears that maybe heart attacks and strokes could be increased on the day of smoke exposure.

But there is too much noise at this point I think to make any conclusions if there is an association or not with smoke and CVD or respiratory deaths in this data.

Daily distributed lagged estimates table.

knitr::kable(dplyr::select(constrained_lag_results_death, outcome, time:upper_95) %>%
    mutate_at(vars(odds_ratio:upper_95), ~round(.x, 3)),
             caption = 'Lagged Associations between Smoke and Underlying Cause of Death')
Lagged Associations between Smoke and Underlying Cause of Death
outcome time odds_ratio lower_95 upper_95
Respiratory 0 1.028 0.916 1.153
Respiratory 1 1.044 0.991 1.100
Respiratory 2 1.049 0.975 1.130
Respiratory 3 1.034 0.960 1.114
Respiratory 4 1.000 0.947 1.056
Respiratory 5 0.956 0.850 1.076
COPD 0 0.999 0.861 1.159
COPD 1 0.999 0.933 1.070
COPD 2 0.997 0.901 1.103
COPD 3 0.989 0.894 1.094
COPD 4 0.975 0.906 1.050
COPD 5 0.959 0.820 1.121
CVD 0 1.006 0.935 1.082
CVD 1 1.016 0.983 1.050
CVD 2 1.021 0.977 1.067
CVD 3 1.017 0.973 1.063
CVD 4 1.004 0.971 1.037
CVD 5 0.986 0.918 1.060
Heart Failure 0 0.991 0.765 1.285
Heart Failure 1 1.000 0.894 1.119
Heart Failure 2 0.996 0.847 1.171
Heart Failure 3 0.968 0.822 1.138
Heart Failure 4 0.917 0.809 1.039
Heart Failure 5 0.858 0.649 1.135
Ischemic Heart Disease 0 0.946 0.845 1.060
Ischemic Heart Disease 1 1.012 0.961 1.065
Ischemic Heart Disease 2 1.058 0.987 1.134
Ischemic Heart Disease 3 1.058 0.986 1.134
Ischemic Heart Disease 4 1.011 0.959 1.065
Ischemic Heart Disease 5 0.944 0.845 1.056
Myocardial Infarction 0 1.092 0.864 1.379
Myocardial Infarction 1 1.078 0.974 1.193
Myocardial Infarction 2 1.054 0.915 1.214
Myocardial Infarction 3 1.011 0.874 1.168
Myocardial Infarction 4 0.951 0.849 1.064
Myocardial Infarction 5 0.885 0.697 1.125
Cerebrovascular 0 1.082 0.914 1.281
Cerebrovascular 1 1.024 0.948 1.105
Cerebrovascular 2 0.983 0.887 1.090
Cerebrovascular 3 0.973 0.878 1.079
Cerebrovascular 4 0.992 0.922 1.067
Cerebrovascular 5 1.027 0.873 1.208

Unconstrained Lag

Unconstrained lag for mortality.

# repeat outcomes 4 times
ucod <- rep(cause_death, each = 6)
# day to repeat
day <- rep(0:5, times = 9)

# lag day vector
lag_days <- c('', '_lag1', '_lag2', '_lag3', '_lag4', '_lag5')

# map across lagged days
death_uc_lag_results <- map_dfr(co_death_list, function(df){
  data <- df
    results <- map_dfr(lag_days, function(x){
      # define function
      f <- as.formula(paste('outcome~',
        paste(paste0(c('gpm_smk10unit', 'o3_8hr_max_ppb', 'temp_f_grid'), 
                     x), collapse = '+'),
        '+strata(id)'))
      r <- broom::tidy(clogit(f, data = data)) %>% 
      filter(term == paste0('gpm_smk10unit',x)) %>% 
      dplyr::select(term, estimate, conf.low, conf.high) %>% 
      mutate_at(vars(estimate:conf.high), exp)
    })
  }) %>% 
  cbind(ucod, day, .) %>% 
  mutate(outcome = forcats::fct_relevel(ucod, cause_death),
         group = as.factor(if_else(outcome %in% cause_death[1:3],
         "Respiratory", "Cardiovascular"))) 

Plot for unconstrained lag.

death_uc_plot <- ggplot(data=death_uc_lag_results, aes(x=day, y=estimate, 
                                                group = group, color = group)) +
  geom_point() +
  geom_errorbar(aes(ymin=conf.low, ymax=conf.high), width = 0.3) +
  facet_wrap(~outcome, scales = 'free_y', ncol = 4) +
  scale_color_manual("Cardiopulmonary", values = c("#ff00cc", "darkblue")) +
  geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
  ylab(expression(paste("Odds Ratio: 10", mu, "g/m"^3, " Increase in Smoke PM"[2.5]))) +
  xlab("Lagged Days") +
  theme_bw()+
  theme(panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype = "dotted"),
        legend.direction = 'horizontal',
        legend.position = 'bottom',
        panel.grid.minor = element_blank(),
        strip.background = element_blank())

print(death_uc_plot)

Table for unconstrained lag values.

knitr::kable(dplyr::select(death_uc_lag_results, ucod, day, estimate:conf.high) %>%
    mutate_at(vars(estimate:conf.high), ~round(.x, 3)),
             caption = 'Uncontrained Lag Association between Smoke and Underlying Cause of Death')
Uncontrained Lag Association between Smoke and Underlying Cause of Death
ucod day estimate conf.low conf.high
Respiratory 0 1.072 0.943 1.219
Respiratory 1 1.107 0.980 1.249
Respiratory 2 1.087 0.958 1.233
Respiratory 3 1.022 0.893 1.169
Respiratory 4 1.106 0.971 1.259
Respiratory 5 0.950 0.830 1.086
Asthma 0 1.746 0.788 3.868
Asthma 1 2.070 0.640 6.691
Asthma 2 0.455 0.094 2.193
Asthma 3 3.301 1.032 10.559
Asthma 4 1.845 0.585 5.825
Asthma 5 1.484 0.516 4.271
COPD 0 1.014 0.862 1.192
COPD 1 0.971 0.820 1.149
COPD 2 0.959 0.801 1.148
COPD 3 0.935 0.776 1.127
COPD 4 1.042 0.878 1.237
COPD 5 0.897 0.748 1.075
CVD 0 1.024 0.944 1.111
CVD 1 1.041 0.961 1.127
CVD 2 1.052 0.972 1.138
CVD 3 1.007 0.933 1.087
CVD 4 1.044 0.966 1.129
CVD 5 0.987 0.910 1.070
Heart Failure 0 0.935 0.699 1.252
Heart Failure 1 1.037 0.774 1.389
Heart Failure 2 1.005 0.767 1.318
Heart Failure 3 0.837 0.623 1.126
Heart Failure 4 0.763 0.548 1.061
Heart Failure 5 0.854 0.630 1.158
Cardiac Arrest 0 2.932 1.111 7.740
Cardiac Arrest 1 1.457 0.697 3.044
Cardiac Arrest 2 1.110 0.415 2.970
Cardiac Arrest 3 2.005 0.865 4.650
Cardiac Arrest 4 2.390 1.046 5.463
Cardiac Arrest 5 1.653 0.805 3.393
Ischemic Heart Disease 0 0.968 0.854 1.099
Ischemic Heart Disease 1 1.051 0.928 1.190
Ischemic Heart Disease 2 1.088 0.959 1.234
Ischemic Heart Disease 3 1.051 0.932 1.187
Ischemic Heart Disease 4 1.014 0.891 1.154
Ischemic Heart Disease 5 0.978 0.864 1.108
Myocardial Infarction 0 1.168 0.900 1.516
Myocardial Infarction 1 1.114 0.867 1.431
Myocardial Infarction 2 1.095 0.866 1.385
Myocardial Infarction 3 1.037 0.803 1.340
Myocardial Infarction 4 0.942 0.720 1.232
Myocardial Infarction 5 0.876 0.667 1.149
Cerebrovascular 0 1.135 0.940 1.370
Cerebrovascular 1 0.988 0.818 1.195
Cerebrovascular 2 0.980 0.816 1.176
Cerebrovascular 3 0.961 0.807 1.145
Cerebrovascular 4 1.132 0.961 1.334
Cerebrovascular 5 0.937 0.777 1.130

Sex-Specific Stratification

Using the distributed lag model for each sex strata. Also modifying code to pull out n cases analyzed.

# estimating lagged effects
ucod <- rep(cause_death, each = 24)

sex_strata <- c('F', 'M')

sex_death_results  <- map_dfr(co_death_list, function(df){
  results <- map_dfr(sex_strata, function(x){
   data <- df %>% 
     filter(sex == x) %>% 
     mutate(date = as.Date(date),
           outcome = as.numeric(as.character(outcome))) %>%
    # remove missing lagged data
    filter(!is.na(gpm_smk10unit_lag5))
   
    # create lagged matrix
    pm_mat <- as.matrix(dplyr::select(data, gpm_smk10unit, gpm_smk10unit_lag1, 
                             gpm_smk10unit_lag2, gpm_smk10unit_lag3,
                             gpm_smk10unit_lag4, gpm_smk10unit_lag5))
  
    # temp matrix
    temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
                                 temp_f_grid_lag2, temp_f_grid_lag3,
                                 temp_f_grid_lag4, temp_f_grid_lag5))
    
    # ozone matrix
    o3_mat <- as.matrix(dplyr::select(data, o3_8hr_max_ppb, o3_8hr_max_ppb_lag1,
                               o3_8hr_max_ppb_lag2, o3_8hr_max_ppb_lag3,
                               o3_8hr_max_ppb_lag4, o3_8hr_max_ppb_lag5))
    
    # define lagged basis spline
    exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
    # pm basis
    pm_basis <- pm_mat %*% exp_b
    # temp basis
    temp_basis <- temp_mat %*% exp_b
    # ozone basis
    o3_basis <- o3_mat %*% exp_b
    # run lagged model
    lag_mod <- clogit(outcome ~ pm_basis + o3_basis + temp_basis + strata(id), 
                      data = data)
  
    # estimate lag estimate
    lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b) %>% 
      mutate(sex = x)
    return(lag_est)
  }) 
}) %>% 
  # bind in outcome names
  bind_cols(data.frame(ucod), .) 

Sex-specific plots of the cumulative effect.

# subset to cumulative results
cumulative_sex_death_strata <- sex_death_results %>% 
  filter(type == 'cumulative' & time == 5) %>% 
  mutate(sex_long = if_else(sex == 'F', 'Female', 'Male'),
         outcome = forcats::fct_relevel(ucod, cause_death),
         group = as.factor(if_else(ucod %in% cause_death[1:3],
         "Respiratory", "Cardiovascular"))) %>% 
  filter(!(outcome %in% c('Asthma', 'Cardiac Arrest'))) 

# plot
sex_plot <- ggplot(data=cumulative_sex_death_strata, aes(x=sex_long, y=odds_ratio, 
                                        group = group, color = group)) +
  geom_point() +
  geom_errorbar(aes(ymin=lower_95, ymax=upper_95), width = 0.3) +
  facet_wrap(~outcome, scales = 'free_y', ncol = 4) +
  scale_color_manual("Cardiopulmonary", values = c("#ff00cc", "darkblue")) +
  geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
  ylab(expression(paste("Odds Ratio: 10", mu, "g/m"^3, " Increase in Smoke PM"[2.5]))) +
  xlab("Sex") +
  theme_bw()+
  theme(panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype = "dotted"),
        legend.direction = 'horizontal',
        legend.position = 'bottom',
        panel.grid.minor = element_blank(),
        strip.background = element_blank())

print(sex_plot)

Sex-specific estimates table.

knitr::kable(dplyr::select(cumulative_sex_strata, outcome, sex_long, odds_ratio:upper_95) %>%
    mutate_at(vars(odds_ratio:upper_95), ~round(.x, 3)),
             caption = 'Sex-specific cumulative effect of smoke on hosptializations.')
Sex-specific cumulative effect of smoke on hosptializations.
outcome sex_long odds_ratio lower_95 upper_95
All Respiratory Female 1.005 0.897 1.125
All Respiratory Male 1.055 0.939 1.185
Asthma Female 1.268 0.945 1.701
Asthma Male 1.292 0.954 1.749
COPD Female 1.008 0.743 1.369
COPD Male 1.095 0.807 1.487
Acute Bronchitis Female 1.190 0.629 2.250
Acute Bronchitis Male 1.683 0.937 3.022
Pneumonia Female 1.005 0.820 1.232
Pneumonia Male 1.066 0.863 1.316
All CVD Female 1.041 0.951 1.139
All CVD Male 1.037 0.952 1.130
Arrhythmia Female 1.229 0.980 1.541
Arrhythmia Male 0.950 0.751 1.203
Cerebrovascular Female 1.188 0.906 1.557
Cerebrovascular Male 1.178 0.975 1.423
Heart Failure Female 1.215 0.919 1.605
Heart Failure Male 1.185 0.979 1.434
Ischemic Heart Disease Female 1.071 0.859 1.334
Ischemic Heart Disease Male 1.035 0.833 1.287
Myocardial Infarction Female 1.060 0.881 1.275
Myocardial Infarction Male 0.883 0.715 1.090

Age-Specific Estimates

# repeat cause of death name 
ucod <- rep(cause_death[c(1,4)], each = 36)

# define age strata to loop through
age_strata <- c('age_under_15', 'age_15_to_65', 'age_over_65')

age_death_results  <- map_dfr(co_death_list[c(1,4)], function(df){
  results <- map_dfr(age_strata, function(x){
   data <- df %>% 
     filter(age_cat == x) %>% 
     mutate(date = as.Date(date),
           outcome = as.numeric(as.character(outcome))) %>%
    # remove missing lagged data
    filter(!is.na(gpm_smk10unit_lag5))
   
    # create lagged matrix
    pm_mat <- as.matrix(dplyr::select(data, gpm_smk10unit, gpm_smk10unit_lag1, 
                             gpm_smk10unit_lag2, gpm_smk10unit_lag3,
                             gpm_smk10unit_lag4, gpm_smk10unit_lag5))
  
    # temp matrix
    temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
                                 temp_f_grid_lag2, temp_f_grid_lag3,
                                 temp_f_grid_lag4, temp_f_grid_lag5))
    
    # ozone matrix
    o3_mat <- as.matrix(dplyr::select(data, o3_8hr_max_ppb, o3_8hr_max_ppb_lag1,
                               o3_8hr_max_ppb_lag2, o3_8hr_max_ppb_lag3,
                               o3_8hr_max_ppb_lag4, o3_8hr_max_ppb_lag5))
    
    # define lagged basis spline
    exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
    # pm basis
    pm_basis <- pm_mat %*% exp_b
    # temp basis
    temp_basis <- temp_mat %*% exp_b
    # ozone basis
    o3_basis <- o3_mat %*% exp_b
    # run lagged model
    lag_mod <- clogit(outcome ~ pm_basis + o3_basis + temp_basis + strata(id), 
                      data = data)
  
    # estimate lag estimate
    lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b) %>% 
      mutate(age_cat = x)
    return(lag_est)
  }) 
}) %>% 
  bind_cols(data.frame(ucod), .)

Age-specific plots of the cumulative effect.I’m going to cut out deaths under 15 because there aren’t many of them and the variance is high.

# subset to cumulative results
cumulative_age_death_strata <- age_death_results %>% 
  filter(type == 'cumulative' & time == 5) %>% 
  filter(age_cat != 'age_under_15') %>% 
  mutate(age_long = forcats::fct_relevel(
    case_when(age_cat == 'age_under_15' ~ 'Age < 15',
              age_cat == 'age_15_to_65' ~ 'Age 15 to 64',
              age_cat == 'age_over_65' ~ 'Age > 64'),
    c('Age < 15', 'Age 15 to 64', 'Age > 64')),
    outcome = forcats::fct_relevel(if_else(ucod == 'Respiratory', 'Respiratory',
                                           'Cardiovascular'), 
                                   c('Cardiovascular', 'Respiratory')))

# plot
age_plot <- ggplot(data=cumulative_age_death_strata, aes(x=age_long, y=odds_ratio, 
                                        group = outcome, color = outcome)) +
  geom_point() +
  geom_errorbar(aes(ymin=lower_95, ymax=upper_95), width = 0.3) +
  facet_wrap(~outcome, scales = 'free_y', ncol = 4) +
  scale_color_manual("Cardiopulmonary", values = c("#ff00cc", "darkblue")) +
  geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
  ylab(expression(paste("Odds Ratio: 10", mu, "g/m"^3, " Increase in Smoke PM"[2.5]))) +
  xlab("Age") +
  theme_bw()+
  theme(panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype = "dotted"),
        legend.direction = 'horizontal',
        legend.position = 'bottom',
        panel.grid.minor = element_blank(),
        strip.background = element_blank())

print(age_plot)

Table of age-specific associations.

knitr::kable(dplyr::select(cumulative_age_death_strata, 
                    outcome, age_long, odds_ratio:upper_95) %>%
    mutate_at(vars(odds_ratio:upper_95), ~round(.x, 3)),
             caption = 'Age-specific associations between smoke and underlying cause of death')
Age-specific associations between smoke and underlying cause of death
outcome age_long odds_ratio lower_95 upper_95
Respiratory Age 15 to 64 0.886 0.487 1.608
Respiratory Age > 64 1.158 0.929 1.442
Cardiovascular Age 15 to 64 1.172 0.883 1.555
Cardiovascular Age > 64 1.021 0.887 1.176

Binary Smoke Classifier

The continuous definition of smoke may have some misclassification bias, particularly where there is difficulty distinguishing from PM2.5 impacted by smoke and a high anthropocentric PM2.5 day that also happens to have some smoke in the atmospheric column. I am going to try out a binary outcome of > 10 ug/m^3 PM2.5 with HMS overhead as a binary indicator for smoke and look at the association between multiple days of a binary smoke exposure and the risk of cardiopulmonary morbidity.

Hospitalizations and Binary Smoke

out_rep <- rep(outcome, each = 12)

hosp_binary_results  <- lapply(co_hosp_list, function(x){
  # output dataframe from list
  data <- x %>% 
    mutate(date = as.Date(date),
           outcome = as.numeric(as.character(outcome))) %>%
    # remove missing lagged data
    filter(!is.na(gsmk10_hms_lag5))

  # create lagged matrix
  pm_mat <- as.matrix(dplyr::select(data, gsmk10_hms, gsmk10_hms_lag1, 
                             gsmk10_hms_lag2, gsmk10_hms_lag3, 
                             gsmk10_hms_lag4, gsmk10_hms_lag5))
  
  # temp matrix
  temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
                               temp_f_grid_lag2, temp_f_grid_lag3,
                               temp_f_grid_lag4, temp_f_grid_lag5))
  # define lagged basis spline
  exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
  # pm basis
  pm_basis <- pm_mat %*% exp_b
  # temp basis
  temp_basis <- temp_mat %*% exp_b

  # run lagged model
  lag_mod <- clogit(outcome ~ pm_basis + temp_basis + strata(id), data = data)

  # estimate lag estimate
  lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b) 
  return(lag_est)
  }) %>%  #end lappply
  # bind rows
  map_dfr(.,rbind) %>% 
  # bind in outcome names
  bind_cols(data.frame(out_rep), .) 
# get the cumulative effect over 6 days
cumulative_bin_results <- filter(hosp_binary_results, 
                                 type == 'cumulative' & time == 5) %>% 
    dplyr::select(-strata, -time) %>% 
    mutate(outcome = forcats::fct_relevel(outcome, out_order),
         group = as.factor(if_else(outcome %in% out_order[1:5],
         "Respiratory", "Cardiovascular")))

# plot
cbin_hosp_plot <- ggplot(data = cumulative_bin_results, 
               aes(x=outcome, y = odds_ratio, color = group)) +
  geom_point() +
  geom_errorbar(aes(ymin=lower_95, ymax=upper_95), width = 0.3) +
  scale_color_manual("Cardiopulmonary", values = c("#ff00cc", "darkblue")) +
  geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
  ylab(expression(paste("Odds Ratio: Smoke PM"[2.5], " > 10 ug/m"^3))) +
  xlab("Outcome") +
  ggtitle("Hospitalization Cumulative Smoke Exposure 0 to 5 Days") +
  theme_bw()+
  theme(panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype = "dotted"),
        panel.grid.minor = element_blank(),
        axis.text.x = element_text(angle = 45, hjust=0.95, vjust = 0.75,
                                   size = 10))

print(cbin_hosp_plot)

There is a significant association with this binary cutoff and asthma over 0 to 5 days. Some of the cardiovascular outcomes that were trending towards ‘significance’ were attenuated to the null here.

Mortality

cod <- rep(cause_death, each = 12)

binary_death_results  <- lapply(co_death_list, function(x){
  # output dataframe from list
  data <- x %>% 
    mutate(date = as.Date(date),
           outcome = as.numeric(as.character(outcome))) %>%
    # remove missing lagged data
    filter(!is.na(gpm_smk10unit_lag5))

  # create lagged matrix
  pm_mat <- as.matrix(dplyr::select(data, gsmk10_hms, gsmk10_hms_lag1, 
                             gsmk10_hms_lag2, gsmk10_hms_lag3,
                             gsmk10_hms_lag4, gsmk10_hms_lag5))
  
  # temp matrix
  temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
                               temp_f_grid_lag2, temp_f_grid_lag3,
                               temp_f_grid_lag4, temp_f_grid_lag5))
  # define lagged basis spline
  exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
  # pm basis
  pm_basis <- pm_mat %*% exp_b
  # temp basis
  temp_basis <- temp_mat %*% exp_b
  temp_basis <-
  # run lagged model
  lag_mod <- clogit(outcome ~ pm_basis + temp_basis + strata(id), data = data)

  # estimate lag estimate
  lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b) 
  return(lag_est)
  }) %>%  #end lappply
  # bind rows
  map_dfr(.,rbind) %>% 
  # bind in outcome names
  bind_cols(data.frame(cod), .) 
# get the cumulative effect over 6 days
c_bin_death_results <- filter(binary_death_results, 
                                   type == 'cumulative' & time == 5) %>% 
    dplyr::select(-strata, -time) %>% 
    mutate(outcome = forcats::fct_relevel(cod, cause_death),
         group = as.factor(if_else(cod %in% cause_death[1:3],
         "Respiratory", "Cardiovascular"))) %>% 
    filter(!(outcome %in% c('Asthma', 'Cardiac Arrest')))

# plot
cbin_death_plot <- ggplot(data = c_bin_death_results, 
               aes(x=outcome, y = odds_ratio, color = group)) +
  geom_point() +
  geom_errorbar(aes(ymin=lower_95, ymax=upper_95), width = 0.3) +
  scale_color_manual("Cardiopulmonary", values = c("#ff00cc", "darkblue")) +
  geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
  ylab(expression(paste("Odds Ratio: Smoke PM"[2.5], " > 10 ug/m"^3))) +
  xlab("Outcome") +
  ggtitle("Mortality Cumulative Smoke Exposure 0 to 5 Lagged Days") +
  theme_bw()+
  theme(panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype = "dotted"),
        panel.grid.minor = element_blank(),
        axis.text.x = element_text(angle = 45, hjust=0.95, vjust = 0.75,
                                   size = 10))

print(cbin_death_plot)

No association with mortality.

Fire/Smoke Event-Specific

Looking at specific fires. I think isolating to specific counties impacted by specific fires gives me more confidence in the kriged models to understand the exposure series. We’ve also used this approach in Washington and Oregon studies. I’m going to run separate models in each section, but plot estimates side by side in a couple plots.

Since there are fewer deaths for some of the subcategories and I’m reducing further to specific times and counties, I’m going to only look at respiratory and cardiovascular general categories of underlying cause of death.

Four-Mile Canyon Fire 2010

The first fire I’ll look at is the Four-Mile Canyon Fire that occurred west of Boulder. It started on September 7, 2010 and contained on September 13th. I will limit the time frame to 2010-05-01 to 2010-10-31 and to Boulder County only. I’m open to adding additional counties known to be impacted by wildfire smoke.

out_rep <- rep(outcome, each = 12)

mile4_hosp_cont <-lapply(co_hosp_list, function(x){
  # output dataframe from list
  data <- x %>% 
    # filter to boulder
    filter(fips.x == '08013') %>% 
    filter(date >= '2010-05-01' & date <= '2010-10-31') %>% 
    mutate(date = as.Date(date),
           outcome = as.numeric(as.character(outcome))) %>%
    # remove missing lagged data
    filter(!is.na(gpm_smk10unit_lag5))

  # create lagged matrix
  pm_mat <- as.matrix(dplyr::select(data, gpm_smk10unit, gpm_smk10unit_lag1, 
                             gpm_smk10unit_lag2, gpm_smk10unit_lag3,
                             gpm_smk10unit_lag4, gpm_smk10unit_lag5))
  
  # temp matrix
  temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
                               temp_f_grid_lag2, temp_f_grid_lag3,
                               temp_f_grid_lag4, temp_f_grid_lag5))
    # ozone matrix
  o3_mat <- as.matrix(dplyr::select(data, o3_8hr_max_ppb, o3_8hr_max_ppb_lag1,
                             o3_8hr_max_ppb_lag2, o3_8hr_max_ppb_lag3,
                             o3_8hr_max_ppb_lag4, o3_8hr_max_ppb_lag5))
  
  # define lagged basis spline
  exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
  # pm basis
  pm_basis <- pm_mat %*% exp_b
  # temp basis
  temp_basis <- temp_mat %*% exp_b
  # ozone basis
  o3_basis <- o3_mat %*% exp_b
  
  # run lagged model
  lag_mod <- clogit(outcome ~ pm_basis + temp_basis + o3_basis + strata(id), data = data)
  
  # estimate lag estimate
  lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b) 
  return(lag_est)
  }) %>%  #end lappply
  # bind rows
  map_dfr(.,rbind) %>% 
  # bind in outcome names
  bind_cols(data.frame(out_rep), .) %>% 
  mutate(event = 'Four Mile 2010')
cod <- rep(names(co_death_list[c(1,4)]), each=12)

mile4_death_cont <- lapply(co_death_list[c(1,4)], function(x){
  # output dataframe from list
  x <- co_death_list[[1]]
  data <- x %>%     
    # filter to boulder
    filter(fips.x == '08013') %>% 
    filter(date >= '2010-05-01' & date <= '2010-10-31') %>% 
    mutate(date = as.Date(date),
           outcome = as.numeric(as.character(outcome))) %>%
    # remove missing lagged data
    filter(!is.na(gpm_smk10unit_lag5))

  # create lagged matrix
  pm_mat <- as.matrix(dplyr::select(data, gpm_smk10unit, gpm_smk10unit_lag1, 
                             gpm_smk10unit_lag2, gpm_smk10unit_lag3,
                             gpm_smk10unit_lag4, gpm_smk10unit_lag5))
  
  # temp matrix
  temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
                               temp_f_grid_lag2, temp_f_grid_lag3,
                               temp_f_grid_lag4, temp_f_grid_lag5))
    # ozone matrix
  o3_mat <- as.matrix(dplyr::select(data, o3_8hr_max_ppb, o3_8hr_max_ppb_lag1,
                             o3_8hr_max_ppb_lag2, o3_8hr_max_ppb_lag3,
                             o3_8hr_max_ppb_lag4, o3_8hr_max_ppb_lag5))
  
  # define lagged basis spline
  exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
  # pm basis
  pm_basis <- pm_mat %*% exp_b
  # temp basis
  temp_basis <- temp_mat %*% exp_b
  # ozone basis
  o3_basis <- o3_mat %*% exp_b
  
  # run lagged model
  lag_mod <- clogit(outcome ~ pm_basis + temp_basis + o3_basis + strata(id), data = data)

  # estimate lag estimate
  lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b) 
  return(lag_est)
  }) %>%  #end lappply
  # bind rows
  map_dfr(.,rbind) %>% 
  # bind in outcome names
  bind_cols(data.frame(cod), .) %>% 
  mutate(event = 'Four Mile 2010')

Waldo Canyon 2012

Waldo Canyon started on June 23rd 2012 and burned until July 10th 2012 near Colorado Springs. Limiting to events from El Paso County to assess impact of smoke. Are there other counties I should consider?

Limiting hospitalizations and deaths to May 2012 through August 2012. I’m consider expanding time frame and other counties too.

out_rep <- rep(outcome, each = 12)

waldo_hosp_cont <-lapply(co_hosp_list, function(x){
  # output dataframe from list
  data <- x %>% 
    # filter to el paso
    filter(fips.x == '08041') %>% 
    filter(date >= '2012-05-01' & date <= '2012-10-31') %>% 
    mutate(date = as.Date(date),
           outcome = as.numeric(as.character(outcome))) %>%
    # remove missing lagged data
    filter(!is.na(gpm_smk10unit_lag5))

  # create lagged matrix
  pm_mat <- as.matrix(dplyr::select(data, gpm_smk10unit, gpm_smk10unit_lag1, 
                             gpm_smk10unit_lag2, gpm_smk10unit_lag3,
                             gpm_smk10unit_lag4, gpm_smk10unit_lag5))
  
  # temp matrix
  temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
                               temp_f_grid_lag2, temp_f_grid_lag3,
                               temp_f_grid_lag4, temp_f_grid_lag5))
    # ozone matrix
  o3_mat <- as.matrix(dplyr::select(data, o3_8hr_max_ppb, o3_8hr_max_ppb_lag1,
                             o3_8hr_max_ppb_lag2, o3_8hr_max_ppb_lag3,
                             o3_8hr_max_ppb_lag4, o3_8hr_max_ppb_lag5))
  
  # define lagged basis spline
  exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
  # pm basis
  pm_basis <- pm_mat %*% exp_b
  # temp basis
  temp_basis <- temp_mat %*% exp_b
  # ozone basis
  o3_basis <- o3_mat %*% exp_b
  
  # run lagged model
  lag_mod <- clogit(outcome ~ pm_basis + temp_basis + o3_basis + strata(id), data = data)

  # estimate lag estimate
  lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b) 
  return(lag_est)
  }) %>%  #end lappply
  # bind rows
  map_dfr(.,rbind) %>% 
  # bind in outcome names
  bind_cols(data.frame(out_rep), .) %>% 
  mutate(event = 'Waldo Canyon 2012')
cod <- rep(names(co_death_list[c(1,4)]), each=12)

waldo_death_cont <- lapply(co_death_list[c(1,4)], function(x){
  # output dataframe from list
  data <- x %>%     
    # filter to el paso
    filter(fips.x == '08041') %>% 
    filter(date >= '2012-05-01' & date <= '2012-10-31') %>% 
    mutate(date = as.Date(date),
           outcome = as.numeric(as.character(outcome))) %>%
    # remove missing lagged data
    filter(!is.na(gpm_smk10unit_lag5))

  # create lagged matrix
  pm_mat <- as.matrix(dplyr::select(data, gpm_smk10unit, gpm_smk10unit_lag1, 
                             gpm_smk10unit_lag2, gpm_smk10unit_lag3,
                             gpm_smk10unit_lag4, gpm_smk10unit_lag5))
  
  # temp matrix
  temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
                               temp_f_grid_lag2, temp_f_grid_lag3,
                               temp_f_grid_lag4, temp_f_grid_lag5))
    # ozone matrix
  o3_mat <- as.matrix(dplyr::select(data, o3_8hr_max_ppb, o3_8hr_max_ppb_lag1,
                             o3_8hr_max_ppb_lag2, o3_8hr_max_ppb_lag3,
                             o3_8hr_max_ppb_lag4, o3_8hr_max_ppb_lag5))
  
  # define lagged basis spline
  exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
  # pm basis
  pm_basis <- pm_mat %*% exp_b
  # temp basis
  temp_basis <- temp_mat %*% exp_b
  # ozone basis
  o3_basis <- o3_mat %*% exp_b
  
  # run lagged model
  lag_mod <- clogit(outcome ~ pm_basis + temp_basis + o3_basis + strata(id), data = data)

  # estimate lag estimate
  lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b) 
  return(lag_est)
  }) %>%  #end lappply
  # bind rows
  map_dfr(.,rbind) %>% 
  # bind in outcome names
  bind_cols(data.frame(cod), .) %>% 
  mutate(event = 'Waldo Canyon 2012')

High Park Fire

For this fire, I’m limiting hospitalizations and deaths to May 2012 through August 2012 occurring in Larmier county.

out_rep <- rep(outcome, each = 12)

hipark_hosp_cont <-lapply(co_hosp_list, function(x){
  # output dataframe from list
  data <- x %>% 
    # filter to larimer
    filter(fips.x == '08069') %>% 
    filter(date >= '2012-05-01' & date <= '2012-10-31') %>% 
    mutate(date = as.Date(date),
           outcome = as.numeric(as.character(outcome))) %>%
    # remove missing lagged data
    filter(!is.na(gpm_smk10unit_lag5))
  # create lagged matrix
  pm_mat <- as.matrix(dplyr::select(data, gpm_smk10unit, gpm_smk10unit_lag1, 
                             gpm_smk10unit_lag2, gpm_smk10unit_lag3,
                             gpm_smk10unit_lag4, gpm_smk10unit_lag5))
  
  # temp matrix
  temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
                               temp_f_grid_lag2, temp_f_grid_lag3,
                               temp_f_grid_lag4, temp_f_grid_lag5))
    # ozone matrix
  o3_mat <- as.matrix(dplyr::select(data, o3_8hr_max_ppb, o3_8hr_max_ppb_lag1,
                             o3_8hr_max_ppb_lag2, o3_8hr_max_ppb_lag3,
                             o3_8hr_max_ppb_lag4, o3_8hr_max_ppb_lag5))
  
  # define lagged basis spline
  exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
  # pm basis
  pm_basis <- pm_mat %*% exp_b
  # temp basis
  temp_basis <- temp_mat %*% exp_b
  # ozone basis
  o3_basis <- o3_mat %*% exp_b
  
  # run lagged model
  lag_mod <- clogit(outcome ~ pm_basis + temp_basis + o3_basis + strata(id), data = data)

  # estimate lag estimate
  lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b) 
  return(lag_est)
  }) %>%  #end lappply
  # bind rows
  map_dfr(.,rbind) %>% 
  # bind in outcome names
  bind_cols(data.frame(out_rep), .) %>% 
  mutate(event = 'High Park 2012')
cod <- rep(names(co_death_list[c(1,4)]), each=12)

hipark_death_cont <- lapply(co_death_list[c(1,4)], function(x){
  # output dataframe from list
  data <- x %>%     
    # filter to el paso
    filter(fips.x == '08069') %>% 
    filter(date >= '2012-05-01' & date <= '2012-10-31') %>% 
    mutate(date = as.Date(date),
           outcome = as.numeric(as.character(outcome))) %>%
    # remove missing lagged data
    filter(!is.na(gpm_smk10unit_lag5))

  # create lagged matrix
  pm_mat <- as.matrix(dplyr::select(data, gpm_smk10unit, gpm_smk10unit_lag1, 
                             gpm_smk10unit_lag2, gpm_smk10unit_lag3,
                             gpm_smk10unit_lag4, gpm_smk10unit_lag5))
  
  # temp matrix
  temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
                               temp_f_grid_lag2, temp_f_grid_lag3,
                               temp_f_grid_lag4, temp_f_grid_lag5))
    # ozone matrix
  o3_mat <- as.matrix(dplyr::select(data, o3_8hr_max_ppb, o3_8hr_max_ppb_lag1,
                             o3_8hr_max_ppb_lag2, o3_8hr_max_ppb_lag3,
                             o3_8hr_max_ppb_lag4, o3_8hr_max_ppb_lag5))
  
  # define lagged basis spline
  exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
  # pm basis
  pm_basis <- pm_mat %*% exp_b
  # temp basis
  temp_basis <- temp_mat %*% exp_b
  # ozone basis
  o3_basis <- o3_mat %*% exp_b
  
  # run lagged model
  lag_mod <- clogit(outcome ~ pm_basis + temp_basis + o3_basis + strata(id), data = data)

  # estimate lag estimate
  lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b) 
  return(lag_est)
  }) %>%  #end lappply
  # bind rows
  map_dfr(.,rbind) %>% 
  # bind in outcome names
  bind_cols(data.frame(cod), .) %>% 
  mutate(event = 'High Park 2012')

2012 Local Smoke

2012 local fires for all grid cells in study domain.

out_rep <- rep(outcome, each = 12)

local_hosp_cont <-lapply(co_hosp_list, function(x){
  # output dataframe from list
  data <- x %>% 
    filter(date >= '2012-05-01' & date <= '2012-10-31') %>% 
    mutate(date = as.Date(date),
           outcome = as.numeric(as.character(outcome))) %>%
    # remove missing lagged data
    filter(!is.na(gpm_smk10unit_lag5))

  # create lagged matrix
  pm_mat <- as.matrix(dplyr::select(data, gpm_smk10unit, gpm_smk10unit_lag1, 
                             gpm_smk10unit_lag2, gpm_smk10unit_lag3,
                             gpm_smk10unit_lag4, gpm_smk10unit_lag5))
  
  # temp matrix
  temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
                               temp_f_grid_lag2, temp_f_grid_lag3,
                               temp_f_grid_lag4, temp_f_grid_lag5))
    # ozone matrix
  o3_mat <- as.matrix(dplyr::select(data, o3_8hr_max_ppb, o3_8hr_max_ppb_lag1,
                             o3_8hr_max_ppb_lag2, o3_8hr_max_ppb_lag3,
                             o3_8hr_max_ppb_lag4, o3_8hr_max_ppb_lag5))
  
  # define lagged basis spline
  exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
  # pm basis
  pm_basis <- pm_mat %*% exp_b
  # temp basis
  temp_basis <- temp_mat %*% exp_b
  # ozone basis
  o3_basis <- o3_mat %*% exp_b
  
  # run lagged model
  lag_mod <- clogit(outcome ~ pm_basis + temp_basis + o3_basis + strata(id), data = data)

  # estimate lag estimate
  lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b) 
  return(lag_est)
  }) %>%  #end lappply
  # bind rows
  map_dfr(.,rbind) %>% 
  # bind in outcome names
  bind_cols(data.frame(out_rep), .) %>% 
  mutate(event = 'Local 2012')
cod <- rep(names(co_death_list[c(1,4)]), each=12)

local_death_cont <- lapply(co_death_list[c(1,4)], function(x){
  # output dataframe from list
  data <- x %>%     
    filter(date >= '2012-05-01' & date <= '2012-10-31') %>% 
    mutate(date = as.Date(date),
           outcome = as.numeric(as.character(outcome))) %>%
    # remove missing lagged data
    filter(!is.na(gpm_smk10unit_lag5))

  # create lagged matrix
  pm_mat <- as.matrix(dplyr::select(data, gpm_smk10unit, gpm_smk10unit_lag1, 
                             gpm_smk10unit_lag2, gpm_smk10unit_lag3,
                             gpm_smk10unit_lag4, gpm_smk10unit_lag5))
  
  # temp matrix
  temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
                               temp_f_grid_lag2, temp_f_grid_lag3,
                               temp_f_grid_lag4, temp_f_grid_lag5))
    # ozone matrix
  o3_mat <- as.matrix(dplyr::select(data, o3_8hr_max_ppb, o3_8hr_max_ppb_lag1,
                             o3_8hr_max_ppb_lag2, o3_8hr_max_ppb_lag3,
                             o3_8hr_max_ppb_lag4, o3_8hr_max_ppb_lag5))
  
  # define lagged basis spline
  exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
  # pm basis
  pm_basis <- pm_mat %*% exp_b
  # temp basis
  temp_basis <- temp_mat %*% exp_b
  # ozone basis
  o3_basis <- o3_mat %*% exp_b
  
  # run lagged model
  lag_mod <- clogit(outcome ~ pm_basis + temp_basis + o3_basis + strata(id), data = data)

  # estimate lag estimate
  lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b) 
  return(lag_est)
  }) %>%  #end lappply
  # bind rows
  map_dfr(.,rbind) %>% 
  # bind in outcome names
  bind_cols(data.frame(cod), .) %>% 
  mutate(event = 'Local 2012')

2015 Transported Smoke

Fires in Canada and Washington led to the transported smoke event that affected the Front Range communities in Colorado in 2015. I have limited this analysis to 2015-05-01 to 2015-10-31 (note hospitalizations only go until 2015-10-01). I included all Front Range counties.

out_rep <- rep(outcome, each = 12)

trans_hosp_cont <-lapply(co_hosp_list, function(x){
  # output dataframe from list
  data <- x %>% 
    filter(date >= '2015-05-01' & date <= '2015-10-31') %>% 
    mutate(date = as.Date(date),
           outcome = as.numeric(as.character(outcome))) %>%
    # remove missing lagged data
    filter(!is.na(gpm_smk10unit_lag5))

  # create lagged matrix
  pm_mat <- as.matrix(dplyr::select(data, gpm_smk10unit, gpm_smk10unit_lag1, 
                             gpm_smk10unit_lag2, gpm_smk10unit_lag3,
                             gpm_smk10unit_lag4, gpm_smk10unit_lag5))
  
  # temp matrix
  temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
                               temp_f_grid_lag2, temp_f_grid_lag3,
                               temp_f_grid_lag4, temp_f_grid_lag5))
    # ozone matrix
  o3_mat <- as.matrix(dplyr::select(data, o3_8hr_max_ppb, o3_8hr_max_ppb_lag1,
                             o3_8hr_max_ppb_lag2, o3_8hr_max_ppb_lag3,
                             o3_8hr_max_ppb_lag4, o3_8hr_max_ppb_lag5))
  
  # define lagged basis spline
  exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
  # pm basis
  pm_basis <- pm_mat %*% exp_b
  # temp basis
  temp_basis <- temp_mat %*% exp_b
  # ozone basis
  o3_basis <- o3_mat %*% exp_b
  
  # run lagged model
  lag_mod <- clogit(outcome ~ pm_basis + temp_basis + o3_basis + strata(id), data = data)

  # estimate lag estimate
  lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b) 
  return(lag_est)
  }) %>%  #end lappply
  # bind rows
  map_dfr(.,rbind) %>% 
  # bind in outcome names
  bind_cols(data.frame(out_rep), .) %>% 
  mutate(event = 'Transport 2015')
cod <- rep(names(co_death_list[c(1,4)]), each=12)

trans_death_cont <- lapply(co_death_list[c(1,4)], function(x){
  # output dataframe from list
  data <- x %>%     
    filter(date >= '2015-05-01' & date <= '2015-10-31') %>% 
    mutate(date = as.Date(date),
           outcome = as.numeric(as.character(outcome))) %>%
    # remove missing lagged data
    filter(!is.na(gpm_smk10unit_lag5))

  # create lagged matrix
  pm_mat <- as.matrix(dplyr::select(data, gpm_smk10unit, gpm_smk10unit_lag1, 
                             gpm_smk10unit_lag2, gpm_smk10unit_lag3,
                             gpm_smk10unit_lag4, gpm_smk10unit_lag5))
  
  # temp matrix
  temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
                               temp_f_grid_lag2, temp_f_grid_lag3,
                               temp_f_grid_lag4, temp_f_grid_lag5))
    # ozone matrix
  o3_mat <- as.matrix(dplyr::select(data, o3_8hr_max_ppb, o3_8hr_max_ppb_lag1,
                             o3_8hr_max_ppb_lag2, o3_8hr_max_ppb_lag3,
                             o3_8hr_max_ppb_lag4, o3_8hr_max_ppb_lag5))
  
  # define lagged basis spline
  exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
  # pm basis
  pm_basis <- pm_mat %*% exp_b
  # temp basis
  temp_basis <- temp_mat %*% exp_b
  # ozone basis
  o3_basis <- o3_mat %*% exp_b
  
  # run lagged model
  lag_mod <- clogit(outcome ~ pm_basis + temp_basis + o3_basis + strata(id), data = data)

  # estimate lag estimate
  lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b) 
  return(lag_est)
  }) %>%  #end lappply
  # bind rows
  map_dfr(.,rbind) %>% 
  # bind in outcome names
  bind_cols(data.frame(cod), .) %>% 
  mutate(event = 'Transport 2015')

Different Event and Hospitalization Plots

Plots of association for each hospitalization outcome by event. I left out Four Mile Canyon fire since there were so few events the estimates were unstable. This can probably be fixed by adding in other counties that were impacted by smoke.

# bind event dataframes together
hosp_event <- bind_rows(waldo_hosp_cont, hipark_hosp_cont, local_hosp_cont,
                        trans_hosp_cont) %>% 
  filter(type == 'cumulative' & time == 5) %>% 
  dplyr::select(-strata, -time) %>% 
  mutate(outcome = forcats::fct_relevel(out_rep, out_order),
         group = as.factor(if_else(outcome %in% out_order[1:5],
         "Respiratory", "Cardiovascular")),
         # ordering fire event
         event = forcats::fct_relevel(event, c('Waldo Canyon 2012', 'High Park 2012',
                                               'Local 2012', 'Transport 2012'))) %>% 
  # filter out acute bronchitis
  filter(outcome != 'Acute Bronchitis')

# plot
hosp_event_plot <- ggplot(data = hosp_event, 
                          aes(x=outcome, y=odds_ratio, color = group)) +
  geom_point() +
  geom_errorbar(aes(ymin=lower_95, ymax=upper_95), width = 0.3) +
  geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
  facet_wrap(~event, scales = 'free_y') +  
  scale_color_manual("Cardiopulmonary", values = c("#ff00cc", "darkblue")) +
  geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
  ylab(expression(paste("Odds Ratio: 10", mu, "g/m"^3, " Increase in Smoke PM"[2.5]))) +
  xlab("Outcome") +
  theme_bw()+
  theme(panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype = "dotted"),
        panel.grid.minor = element_blank(),        
        legend.direction = 'horizontal',
        legend.position = 'bottom',
        strip.background = element_blank(),
        axis.text.x = element_text(angle = 45, hjust=0.95, vjust = 0.75,
                                   size = 10))
  
print(hosp_event_plot)

I think this plot is kind of interesting for asthma, where there is a positive association with the 2015 transport smoke and an inverse association with the 2012 local fires for the study domain. I don’t see an association with the individual fires, which may suggest the inverse association may be due to exposure misclassification? Or it could be real where poeple are taking risk-avoiding behaviors.I think Sheryl and I hypothesized that there might be an effect here because the first couple days of the smoke event were really hard to tell there was actual smoke and not fog. Also note that I included all front range counties because there were air quality warning up and down the front range. I think we could also do a 2012 to 2015 comparison. This might actually be the cleanest way to do this analysis, with the argument that the Front Range was impacted by smoke from two large fires in 2012 and impacted by smoke from transported smoke.

I left out Four Mile Canyon fire for now since the estimates were unstable due to few events.

Printing out fire events hospitalization table.

knitr::kable(dplyr::select(hosp_event, event, outcome, odds_ratio:upper_95) %>%
    mutate_at(vars(odds_ratio:upper_95), ~round(.x, 3)),
             caption = 'Cumulative association between smoke from specific events and hospitalizations')
Cumulative association between smoke from specific events and hospitalizations
event outcome odds_ratio lower_95 upper_95
Waldo Canyon 2012 All Respiratory 1.155 0.767 1.741
Waldo Canyon 2012 Asthma 0.622 0.203 1.906
Waldo Canyon 2012 COPD 1.056 0.360 3.092
Waldo Canyon 2012 Pneumonia 1.549 0.754 3.182
Waldo Canyon 2012 All CVD 1.032 0.746 1.429
Waldo Canyon 2012 Arrhythmia 0.769 0.301 1.967
Waldo Canyon 2012 Cerebrovascular 1.007 0.458 2.214
Waldo Canyon 2012 Heart Failure 0.988 0.448 2.182
Waldo Canyon 2012 Ischemic Heart Disease 1.291 0.554 3.006
Waldo Canyon 2012 Myocardial Infarction 0.713 0.331 1.534
High Park 2012 All Respiratory 1.089 0.730 1.624
High Park 2012 Asthma 0.635 0.138 2.917
High Park 2012 COPD 1.107 0.322 3.812
High Park 2012 Pneumonia 1.555 0.827 2.923
High Park 2012 All CVD 1.020 0.752 1.384
High Park 2012 Arrhythmia 1.209 0.550 2.656
High Park 2012 Cerebrovascular 1.178 0.575 2.414
High Park 2012 Heart Failure 1.331 0.644 2.750
High Park 2012 Ischemic Heart Disease 0.954 0.409 2.226
High Park 2012 Myocardial Infarction 1.159 0.623 2.158
Local 2012 All Respiratory 0.944 0.827 1.079
Local 2012 Asthma 0.711 0.495 1.022
Local 2012 COPD 0.920 0.650 1.301
Local 2012 Pneumonia 1.105 0.871 1.402
Local 2012 All CVD 1.011 0.913 1.120
Local 2012 Arrhythmia 1.079 0.834 1.396
Local 2012 Cerebrovascular 1.132 0.870 1.472
Local 2012 Heart Failure 1.186 0.908 1.549
Local 2012 Ischemic Heart Disease 1.024 0.782 1.339
Local 2012 Myocardial Infarction 0.986 0.784 1.239
Transport 2015 All Respiratory 1.052 0.921 1.201
Transport 2015 Asthma 1.614 1.154 2.256
Transport 2015 COPD 0.991 0.669 1.467
Transport 2015 Pneumonia 0.886 0.690 1.140
Transport 2015 All CVD 1.115 1.011 1.230
Transport 2015 Arrhythmia 1.121 0.857 1.466
Transport 2015 Cerebrovascular 1.117 0.879 1.420
Transport 2015 Heart Failure 1.116 0.875 1.424
Transport 2015 Ischemic Heart Disease 1.149 0.905 1.457
Transport 2015 Myocardial Infarction 1.114 0.897 1.384

Plotting underlying cause of death by fire events.

# bind event dataframes together
death_event <- bind_rows(waldo_death_cont, hipark_death_cont, local_death_cont,
                        trans_death_cont) %>% 
  filter(type == 'cumulative' & time == 5) %>% 
  dplyr::select(-strata, -time) %>% 
  mutate(outcome = as.factor(if_else(cod == 'resp', 
                                     'Respiratory', 'Cardiovascular')),
         # ordering fire event
         event = forcats::fct_relevel(event, c('Waldo Canyon 2012', 'High Park 2012',
                                               'Local 2012', 'Transport 2012'))) 

# plot
death_event_plot <- ggplot(data = death_event, 
                          aes(x=outcome, y=odds_ratio, color = outcome)) +
  geom_point() +
  geom_errorbar(aes(ymin=lower_95, ymax=upper_95), width = 0.3) +
  geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
  facet_wrap(~event) +  
  scale_color_manual("Cardiopulmonary", values = c("#ff00cc", "darkblue")) +
  geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
  ylab(expression(paste("Odds Ratio: 10", mu, "g/m"^3, " Increase in Smoke PM"[2.5]))) +
  xlab("Outcome") +
  theme_bw()+
  theme(panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype = "dotted"),
        panel.grid.minor = element_blank(),        
        legend.direction = 'horizontal',
        legend.position = 'bottom',
        strip.background = element_blank(),
        axis.text.x = element_text(angle = 45, hjust=0.95, vjust = 0.75,
                                   size = 10))
  
print(death_event_plot)

No association with deaths.

Table for mortality for specific events.

knitr::kable(dplyr::select(death_event, outcome, event,odds_ratio:upper_95) %>%
    mutate_at(vars(odds_ratio:upper_95), ~round(.x, 3)),
             caption = 'Cumulative association with smoke and mortality for specific events')
Cumulative association with smoke and mortality for specific events
outcome event odds_ratio lower_95 upper_95
Respiratory Waldo Canyon 2012 0.750 0.218 2.578
Cardiovascular Waldo Canyon 2012 1.294 0.678 2.472
Respiratory High Park 2012 0.220 0.051 0.950
Cardiovascular High Park 2012 0.770 0.409 1.447
Respiratory Local 2012 0.998 0.702 1.419
Cardiovascular Local 2012 1.086 0.878 1.343
Respiratory Transport 2015 1.024 0.741 1.415
Cardiovascular Transport 2015 0.969 0.796 1.180

Conclusions

I think there could be something interesting looking at 2012 vs. 2015 smoke, where transported smoke could be a risk factor. The hypothesis here could be that people are aware of smoke from local fires since they are publicized, where the transported smoke was not clear to people until there were air quality warnings. Mortality doesn’t seem to be associated with these short-term smoke events.